@ishlabs/cli 0.24.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask.js +3 -3
- package/dist/commands/iteration.js +1 -1
- package/dist/commands/study-analyze.js +1 -1
- package/dist/commands/study-run.js +80 -12
- package/dist/commands/study.js +11 -7
- package/dist/lib/alias-store.js +1 -1
- package/dist/lib/api-client.d.ts +2 -0
- package/dist/lib/docs.js +57 -42
- package/dist/lib/local-sim/actions.d.ts +10 -2
- package/dist/lib/local-sim/actions.js +16 -11
- package/dist/lib/local-sim/adb.d.ts +103 -0
- package/dist/lib/local-sim/adb.js +352 -0
- package/dist/lib/local-sim/android.d.ts +111 -0
- package/dist/lib/local-sim/android.js +499 -0
- package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
- package/dist/lib/local-sim/apk-manifest.js +210 -0
- package/dist/lib/local-sim/browser.d.ts +22 -0
- package/dist/lib/local-sim/browser.js +65 -0
- package/dist/lib/local-sim/coordinates.d.ts +69 -0
- package/dist/lib/local-sim/coordinates.js +59 -0
- package/dist/lib/local-sim/device.d.ts +143 -0
- package/dist/lib/local-sim/device.js +152 -0
- package/dist/lib/local-sim/ios.d.ts +168 -0
- package/dist/lib/local-sim/ios.js +546 -0
- package/dist/lib/local-sim/loop.d.ts +14 -2
- package/dist/lib/local-sim/loop.js +166 -73
- package/dist/lib/local-sim/native-a11y.d.ts +97 -0
- package/dist/lib/local-sim/native-a11y.js +384 -0
- package/dist/lib/local-sim/simctl.d.ts +85 -0
- package/dist/lib/local-sim/simctl.js +273 -0
- package/dist/lib/local-sim/types.d.ts +37 -2
- package/dist/lib/local-sim/upload.d.ts +1 -1
- package/dist/lib/local-sim/upload.js +9 -6
- package/dist/lib/output.js +58 -12
- package/dist/lib/skill-content.js +10 -9
- package/package.json +2 -1
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over the `adb` CLI for the native-Android sim path.
|
|
3
|
+
*
|
|
4
|
+
* One emulator/device is assumed (the lead coordinates a single shared
|
|
5
|
+
* emulator). Every call shells out to a resolved `adb` binary; binary output
|
|
6
|
+
* (screencap) is captured without a utf-8 round-trip so PNG bytes survive.
|
|
7
|
+
*
|
|
8
|
+
* Coordinate space: `adb shell screencap` and `adb shell input tap` share ONE
|
|
9
|
+
* pixel space — there is NO DPR correction. The native sim de-normalizes the
|
|
10
|
+
* backend's 0-1000 coordinates against the screencap pixel size and taps
|
|
11
|
+
* directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
|
|
12
|
+
*/
|
|
13
|
+
export declare class AdbError extends Error {
|
|
14
|
+
constructor(message: string);
|
|
15
|
+
}
|
|
16
|
+
/** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
|
|
17
|
+
export declare function adb(args: string[], timeoutMs?: number): Promise<string>;
|
|
18
|
+
/** Run `adb shell <args>` and return trimmed stdout. */
|
|
19
|
+
export declare function adbShell(args: string[], timeoutMs?: number): Promise<string>;
|
|
20
|
+
/**
|
|
21
|
+
* Capture the current screen as raw PNG bytes via `adb exec-out screencap -p`.
|
|
22
|
+
* `exec-out` (not `shell`) avoids the CRLF translation that corrupts binary
|
|
23
|
+
* output. Returns the PNG buffer at full device resolution.
|
|
24
|
+
*/
|
|
25
|
+
export declare function screencapPng(): Promise<Buffer>;
|
|
26
|
+
/** Assert exactly one device/emulator is in the `device` state. */
|
|
27
|
+
export declare function requireOneDevice(): Promise<void>;
|
|
28
|
+
export declare function inputTap(x: number, y: number): Promise<void>;
|
|
29
|
+
export declare function inputSwipe(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
|
|
30
|
+
/**
|
|
31
|
+
* A drag GRABS an element and drops it elsewhere — press, HOLD to pick up,
|
|
32
|
+
* move, release. We use `input draganddrop`, NOT a slow `input swipe`: a swipe
|
|
33
|
+
* never holds at the start, so on the surfaces a drag actually targets
|
|
34
|
+
* (launcher icons, reorderable list rows — all `long-clickable`) the framework
|
|
35
|
+
* reads a slow swipe as a directional SWIPE (e.g. it opens the app drawer)
|
|
36
|
+
* instead of picking the element up. `draganddrop` dwells at the press point
|
|
37
|
+
* first, triggering the long-press pickup, then moves and releases — verified
|
|
38
|
+
* on-device to actually rearrange a launcher icon where the slow swipe didn't.
|
|
39
|
+
* Requires API 30+ (`input draganddrop`); on the dev emulators/sim devices the
|
|
40
|
+
* native driver targets, that's always present. Coordinates are screencap
|
|
41
|
+
* pixels (the same space as tap/swipe — no DPR correction).
|
|
42
|
+
*/
|
|
43
|
+
export declare function inputDrag(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
|
|
44
|
+
/** A long-press is a zero-distance swipe held for `durationMs`. */
|
|
45
|
+
export declare function inputLongPress(x: number, y: number, durationMs?: number): Promise<void>;
|
|
46
|
+
export declare function pressKeyEvent(keyevent: string): Promise<void>;
|
|
47
|
+
/**
|
|
48
|
+
* Force a device orientation. We first disable auto-rotation
|
|
49
|
+
* (`accelerometer_rotation 0`) — otherwise the sensor immediately overrides
|
|
50
|
+
* our fixed `user_rotation`. `user_rotation` is 0=portrait, 1=landscape (90°).
|
|
51
|
+
*/
|
|
52
|
+
export declare function setUserRotation(orientation: "portrait" | "landscape"): Promise<void>;
|
|
53
|
+
export declare const ADB_KEYBOARD_PKG = "com.android.adbkeyboard";
|
|
54
|
+
/** True if the ADBKeyboard IME is installed on the device. */
|
|
55
|
+
export declare function isAdbKeyboardInstalled(): Promise<boolean>;
|
|
56
|
+
/** Currently-selected IME id (so we can restore it after the run). */
|
|
57
|
+
export declare function currentIme(): Promise<string | null>;
|
|
58
|
+
/** Select an IME by its component id. */
|
|
59
|
+
export declare function setIme(imeId: string): Promise<void>;
|
|
60
|
+
/** Reset the IME to the system default (used when we had no prior IME to restore). */
|
|
61
|
+
export declare function resetIme(): Promise<void>;
|
|
62
|
+
/** Make ADBKeyboard the active IME. */
|
|
63
|
+
export declare function enableAdbKeyboard(): Promise<void>;
|
|
64
|
+
/**
|
|
65
|
+
* Type text into the focused field via ADBKeyboard's base64 broadcast.
|
|
66
|
+
* base64 keeps spaces/unicode/quotes intact across the adb shell boundary.
|
|
67
|
+
* Note: the text transits as a base64 argv arg, so it's briefly visible in the
|
|
68
|
+
* device-side process list (`ps`) — fine for sim input, not a secret channel
|
|
69
|
+
* (context secrets are resolved here but this is local-dev/emulator only).
|
|
70
|
+
*/
|
|
71
|
+
export declare function adbKeyboardType(text: string): Promise<void>;
|
|
72
|
+
/** Clear the focused field (ADBKeyboard ADB_CLEAR_TEXT broadcast). */
|
|
73
|
+
export declare function adbKeyboardClear(): Promise<void>;
|
|
74
|
+
/**
|
|
75
|
+
* Read width/height from a PNG buffer's IHDR chunk. The IHDR is always the
|
|
76
|
+
* first chunk: 8-byte signature, 4-byte length, 4-byte "IHDR", then width and
|
|
77
|
+
* height as big-endian uint32 at byte offsets 16 and 20. Avoids pulling in an
|
|
78
|
+
* image library just to learn the screencap's pixel size.
|
|
79
|
+
*/
|
|
80
|
+
export declare function pngDimensions(png: Buffer): {
|
|
81
|
+
width: number;
|
|
82
|
+
height: number;
|
|
83
|
+
};
|
|
84
|
+
export declare function forceStop(pkg: string): Promise<void>;
|
|
85
|
+
/** Launch the app's default launchable activity via monkey (no activity name needed). */
|
|
86
|
+
export declare function launchApp(pkg: string): Promise<void>;
|
|
87
|
+
export declare function installApk(apkPath: string): Promise<void>;
|
|
88
|
+
export declare function isPackageInstalled(pkg: string): Promise<boolean>;
|
|
89
|
+
/**
|
|
90
|
+
* Dump the current view hierarchy as uiautomator XML and return it. Mirrors the
|
|
91
|
+
* oracle's `ui_dump`: uiautomator fails transiently with "window in transition"
|
|
92
|
+
* (right after a UI change, writes nothing) or "could not get idle state" (a
|
|
93
|
+
* view animates forever), so we retry a few times, nudging the device awake
|
|
94
|
+
* between tries since a screen-off device never goes idle-with-content.
|
|
95
|
+
*
|
|
96
|
+
* Uses `exec-out uiautomator dump /dev/tty` so the XML comes back on stdout in
|
|
97
|
+
* one shot (no pull); falls back to dump-to-file + pull if the device writes the
|
|
98
|
+
* "dumped to" line to a path instead. Throws AdbError if every attempt fails so
|
|
99
|
+
* the caller can degrade to the vision path.
|
|
100
|
+
*/
|
|
101
|
+
export declare function dumpUiautomatorXml(): Promise<string>;
|
|
102
|
+
/** All installed package names (the set of `package:<name>` lines). */
|
|
103
|
+
export declare function listPackages(): Promise<Set<string>>;
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over the `adb` CLI for the native-Android sim path.
|
|
3
|
+
*
|
|
4
|
+
* One emulator/device is assumed (the lead coordinates a single shared
|
|
5
|
+
* emulator). Every call shells out to a resolved `adb` binary; binary output
|
|
6
|
+
* (screencap) is captured without a utf-8 round-trip so PNG bytes survive.
|
|
7
|
+
*
|
|
8
|
+
* Coordinate space: `adb shell screencap` and `adb shell input tap` share ONE
|
|
9
|
+
* pixel space — there is NO DPR correction. The native sim de-normalizes the
|
|
10
|
+
* backend's 0-1000 coordinates against the screencap pixel size and taps
|
|
11
|
+
* directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
|
|
12
|
+
*/
|
|
13
|
+
import { execFile } from "node:child_process";
|
|
14
|
+
import { existsSync } from "node:fs";
|
|
15
|
+
import { promisify } from "node:util";
|
|
16
|
+
const execFileAsync = promisify(execFile);
|
|
17
|
+
// adb ships with Homebrew's android-platform-tools and inside the SDK. Prefer
|
|
18
|
+
// an explicit absolute path so we never depend on the caller's PATH (mirrors
|
|
19
|
+
// scripts/mobile-e2e/lib.sh). Override with ISH_ADB / ADB.
|
|
20
|
+
function resolveAdb() {
|
|
21
|
+
const fromEnv = process.env.ISH_ADB || process.env.ADB;
|
|
22
|
+
if (fromEnv && existsSync(fromEnv))
|
|
23
|
+
return fromEnv;
|
|
24
|
+
const homebrew = "/opt/homebrew/bin/adb";
|
|
25
|
+
if (existsSync(homebrew))
|
|
26
|
+
return homebrew;
|
|
27
|
+
const sdkHome = process.env.ANDROID_HOME || process.env.ANDROID_SDK_ROOT;
|
|
28
|
+
if (sdkHome) {
|
|
29
|
+
const sdkAdb = `${sdkHome}/platform-tools/adb`;
|
|
30
|
+
if (existsSync(sdkAdb))
|
|
31
|
+
return sdkAdb;
|
|
32
|
+
}
|
|
33
|
+
// Last resort: rely on PATH and surface a clear error if it's missing.
|
|
34
|
+
return "adb";
|
|
35
|
+
}
|
|
36
|
+
const ADB = resolveAdb();
|
|
37
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
38
|
+
// screencap on a cold emulator frame can be slow; give it generous headroom.
|
|
39
|
+
const SCREENCAP_TIMEOUT_MS = 30_000;
|
|
40
|
+
// 16 MB covers a full-resolution PNG frame from a phone-sized emulator.
|
|
41
|
+
const SCREENCAP_MAX_BUFFER = 16 * 1024 * 1024;
|
|
42
|
+
export class AdbError extends Error {
|
|
43
|
+
constructor(message) {
|
|
44
|
+
super(message);
|
|
45
|
+
this.name = "AdbError";
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
|
|
49
|
+
export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
50
|
+
try {
|
|
51
|
+
const { stdout } = await execFileAsync(ADB, args, {
|
|
52
|
+
timeout: timeoutMs,
|
|
53
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
54
|
+
});
|
|
55
|
+
return stdout.trim();
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
59
|
+
throw new AdbError(`adb ${args.join(" ")} failed: ${msg}`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/** Run `adb shell <args>` and return trimmed stdout. */
|
|
63
|
+
export async function adbShell(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
64
|
+
return adb(["shell", ...args], timeoutMs);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Capture the current screen as raw PNG bytes via `adb exec-out screencap -p`.
|
|
68
|
+
* `exec-out` (not `shell`) avoids the CRLF translation that corrupts binary
|
|
69
|
+
* output. Returns the PNG buffer at full device resolution.
|
|
70
|
+
*/
|
|
71
|
+
export async function screencapPng() {
|
|
72
|
+
try {
|
|
73
|
+
const { stdout } = await execFileAsync(ADB, ["exec-out", "screencap", "-p"], {
|
|
74
|
+
timeout: SCREENCAP_TIMEOUT_MS,
|
|
75
|
+
maxBuffer: SCREENCAP_MAX_BUFFER,
|
|
76
|
+
encoding: "buffer",
|
|
77
|
+
});
|
|
78
|
+
return stdout;
|
|
79
|
+
}
|
|
80
|
+
catch (err) {
|
|
81
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
82
|
+
throw new AdbError(`adb exec-out screencap failed: ${msg}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/** Assert exactly one device/emulator is in the `device` state. */
|
|
86
|
+
export async function requireOneDevice() {
|
|
87
|
+
let out;
|
|
88
|
+
try {
|
|
89
|
+
out = await adb(["devices"]);
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
93
|
+
throw new AdbError(`Could not run adb (looked for "${ADB}"). Is the Android SDK installed and an emulator booted? ${msg}`);
|
|
94
|
+
}
|
|
95
|
+
// Output: "List of devices attached\n<serial>\tdevice\n..."
|
|
96
|
+
const online = out
|
|
97
|
+
.split("\n")
|
|
98
|
+
.slice(1)
|
|
99
|
+
.map((l) => l.trim())
|
|
100
|
+
.filter((l) => l && l.endsWith("\tdevice"));
|
|
101
|
+
if (online.length === 0) {
|
|
102
|
+
throw new AdbError("No Android device/emulator online. Boot one first (e.g. `npm run mobile-e2e-setup`).");
|
|
103
|
+
}
|
|
104
|
+
if (online.length > 1) {
|
|
105
|
+
throw new AdbError(`Expected exactly one Android device, found ${online.length}. Stop the extras (the sim drives a single device).`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// --- Input gestures (all in screencap pixel space) ---
|
|
109
|
+
export async function inputTap(x, y) {
|
|
110
|
+
await adbShell(["input", "tap", String(Math.round(x)), String(Math.round(y))]);
|
|
111
|
+
}
|
|
112
|
+
export async function inputSwipe(x1, y1, x2, y2, durationMs = 300) {
|
|
113
|
+
await adbShell([
|
|
114
|
+
"input",
|
|
115
|
+
"swipe",
|
|
116
|
+
String(Math.round(x1)),
|
|
117
|
+
String(Math.round(y1)),
|
|
118
|
+
String(Math.round(x2)),
|
|
119
|
+
String(Math.round(y2)),
|
|
120
|
+
String(Math.round(durationMs)),
|
|
121
|
+
]);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* A drag GRABS an element and drops it elsewhere — press, HOLD to pick up,
|
|
125
|
+
* move, release. We use `input draganddrop`, NOT a slow `input swipe`: a swipe
|
|
126
|
+
* never holds at the start, so on the surfaces a drag actually targets
|
|
127
|
+
* (launcher icons, reorderable list rows — all `long-clickable`) the framework
|
|
128
|
+
* reads a slow swipe as a directional SWIPE (e.g. it opens the app drawer)
|
|
129
|
+
* instead of picking the element up. `draganddrop` dwells at the press point
|
|
130
|
+
* first, triggering the long-press pickup, then moves and releases — verified
|
|
131
|
+
* on-device to actually rearrange a launcher icon where the slow swipe didn't.
|
|
132
|
+
* Requires API 30+ (`input draganddrop`); on the dev emulators/sim devices the
|
|
133
|
+
* native driver targets, that's always present. Coordinates are screencap
|
|
134
|
+
* pixels (the same space as tap/swipe — no DPR correction).
|
|
135
|
+
*/
|
|
136
|
+
export async function inputDrag(x1, y1, x2, y2, durationMs = 800) {
|
|
137
|
+
await adbShell([
|
|
138
|
+
"input",
|
|
139
|
+
"draganddrop",
|
|
140
|
+
String(Math.round(x1)),
|
|
141
|
+
String(Math.round(y1)),
|
|
142
|
+
String(Math.round(x2)),
|
|
143
|
+
String(Math.round(y2)),
|
|
144
|
+
String(Math.round(durationMs)),
|
|
145
|
+
]);
|
|
146
|
+
}
|
|
147
|
+
/** A long-press is a zero-distance swipe held for `durationMs`. */
|
|
148
|
+
export async function inputLongPress(x, y, durationMs = 600) {
|
|
149
|
+
const px = Math.round(x);
|
|
150
|
+
const py = Math.round(y);
|
|
151
|
+
await adbShell(["input", "swipe", String(px), String(py), String(px), String(py), String(Math.round(durationMs))]);
|
|
152
|
+
}
|
|
153
|
+
export async function pressKeyEvent(keyevent) {
|
|
154
|
+
await adbShell(["input", "keyevent", keyevent]);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Force a device orientation. We first disable auto-rotation
|
|
158
|
+
* (`accelerometer_rotation 0`) — otherwise the sensor immediately overrides
|
|
159
|
+
* our fixed `user_rotation`. `user_rotation` is 0=portrait, 1=landscape (90°).
|
|
160
|
+
*/
|
|
161
|
+
export async function setUserRotation(orientation) {
|
|
162
|
+
await adbShell(["settings", "put", "system", "accelerometer_rotation", "0"]);
|
|
163
|
+
await adbShell(["settings", "put", "system", "user_rotation", orientation === "landscape" ? "1" : "0"]);
|
|
164
|
+
}
|
|
165
|
+
// --- Text input via ADBKeyboard ---
|
|
166
|
+
//
|
|
167
|
+
// `adb shell input text` mangles spaces, unicode, and special chars and can't
|
|
168
|
+
// target a specific field. ADBKeyboard (a tiny IME, pkg below) accepts a
|
|
169
|
+
// broadcast carrying base64-encoded UTF-8 and types it into the focused field
|
|
170
|
+
// reliably. We set it as the active IME for the run and restore afterwards.
|
|
171
|
+
export const ADB_KEYBOARD_PKG = "com.android.adbkeyboard";
|
|
172
|
+
const ADB_KEYBOARD_IME = `${ADB_KEYBOARD_PKG}/.AdbIME`;
|
|
173
|
+
/** True if the ADBKeyboard IME is installed on the device. */
|
|
174
|
+
export async function isAdbKeyboardInstalled() {
|
|
175
|
+
return isPackageInstalled(ADB_KEYBOARD_PKG);
|
|
176
|
+
}
|
|
177
|
+
/** Currently-selected IME id (so we can restore it after the run). */
|
|
178
|
+
export async function currentIme() {
|
|
179
|
+
const out = await adbShell(["settings", "get", "secure", "default_input_method"]);
|
|
180
|
+
const trimmed = out.trim();
|
|
181
|
+
return trimmed && trimmed !== "null" ? trimmed : null;
|
|
182
|
+
}
|
|
183
|
+
/** Select an IME by its component id. */
|
|
184
|
+
export async function setIme(imeId) {
|
|
185
|
+
await adbShell(["ime", "set", imeId]);
|
|
186
|
+
}
|
|
187
|
+
/** Reset the IME to the system default (used when we had no prior IME to restore). */
|
|
188
|
+
export async function resetIme() {
|
|
189
|
+
await adbShell(["ime", "reset"]);
|
|
190
|
+
}
|
|
191
|
+
/** Make ADBKeyboard the active IME. */
|
|
192
|
+
export async function enableAdbKeyboard() {
|
|
193
|
+
await adbShell(["ime", "enable", ADB_KEYBOARD_IME]);
|
|
194
|
+
await adbShell(["ime", "set", ADB_KEYBOARD_IME]);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Type text into the focused field via ADBKeyboard's base64 broadcast.
|
|
198
|
+
* base64 keeps spaces/unicode/quotes intact across the adb shell boundary.
|
|
199
|
+
* Note: the text transits as a base64 argv arg, so it's briefly visible in the
|
|
200
|
+
* device-side process list (`ps`) — fine for sim input, not a secret channel
|
|
201
|
+
* (context secrets are resolved here but this is local-dev/emulator only).
|
|
202
|
+
*/
|
|
203
|
+
export async function adbKeyboardType(text) {
|
|
204
|
+
const b64 = Buffer.from(text, "utf-8").toString("base64");
|
|
205
|
+
await adbShell([
|
|
206
|
+
"am",
|
|
207
|
+
"broadcast",
|
|
208
|
+
"-a",
|
|
209
|
+
"ADB_INPUT_B64",
|
|
210
|
+
"--es",
|
|
211
|
+
"msg",
|
|
212
|
+
b64,
|
|
213
|
+
]);
|
|
214
|
+
}
|
|
215
|
+
/** Clear the focused field (ADBKeyboard ADB_CLEAR_TEXT broadcast). */
|
|
216
|
+
export async function adbKeyboardClear() {
|
|
217
|
+
await adbShell(["am", "broadcast", "-a", "ADB_CLEAR_TEXT"]);
|
|
218
|
+
}
|
|
219
|
+
// --- PNG dimensions (dependency-free IHDR read) ---
|
|
220
|
+
/**
|
|
221
|
+
* Read width/height from a PNG buffer's IHDR chunk. The IHDR is always the
|
|
222
|
+
* first chunk: 8-byte signature, 4-byte length, 4-byte "IHDR", then width and
|
|
223
|
+
* height as big-endian uint32 at byte offsets 16 and 20. Avoids pulling in an
|
|
224
|
+
* image library just to learn the screencap's pixel size.
|
|
225
|
+
*/
|
|
226
|
+
export function pngDimensions(png) {
|
|
227
|
+
const PNG_SIG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
|
|
228
|
+
if (png.length < 24 || !png.subarray(0, 8).equals(PNG_SIG)) {
|
|
229
|
+
throw new AdbError("screencap did not return a PNG (unexpected screencap output)");
|
|
230
|
+
}
|
|
231
|
+
const width = png.readUInt32BE(16);
|
|
232
|
+
const height = png.readUInt32BE(20);
|
|
233
|
+
if (width <= 0 || height <= 0) {
|
|
234
|
+
throw new AdbError(`screencap PNG has invalid dimensions ${width}x${height}`);
|
|
235
|
+
}
|
|
236
|
+
return { width, height };
|
|
237
|
+
}
|
|
238
|
+
// --- App lifecycle ---
|
|
239
|
+
export async function forceStop(pkg) {
|
|
240
|
+
await adbShell(["am", "force-stop", pkg]);
|
|
241
|
+
}
|
|
242
|
+
/** Launch the app's default launchable activity via monkey (no activity name needed). */
|
|
243
|
+
export async function launchApp(pkg) {
|
|
244
|
+
// `monkey` with a LAUNCHER+MAIN category resolves the entry activity for us,
|
|
245
|
+
// so callers don't have to know the activity name.
|
|
246
|
+
const out = await adbShell([
|
|
247
|
+
"monkey",
|
|
248
|
+
"-p",
|
|
249
|
+
pkg,
|
|
250
|
+
"-c",
|
|
251
|
+
"android.intent.category.LAUNCHER",
|
|
252
|
+
"1",
|
|
253
|
+
]);
|
|
254
|
+
// monkey exits 0 even when the package has no launchable activity or isn't
|
|
255
|
+
// installed — it prints "No activities found to run, monkey aborting." to
|
|
256
|
+
// stdout. Catch that so a bad package fails loudly instead of silently
|
|
257
|
+
// leaving whatever was already in the foreground.
|
|
258
|
+
if (/no activities found to run/i.test(out) || /aborting/i.test(out)) {
|
|
259
|
+
throw new AdbError(`No launchable activity for package "${pkg}" (is it installed?): ${out.trim()}`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
export async function installApk(apkPath) {
|
|
263
|
+
// -r reinstall keeping data, -g grant runtime permissions, -t allow test apks.
|
|
264
|
+
const out = await adb(["install", "-r", "-g", "-t", apkPath], 180_000);
|
|
265
|
+
// `adb install` exits 0 even on `Failure [INSTALL_FAILED_*]` — only a
|
|
266
|
+
// "Success" line means it landed. Throw otherwise so a bad apk is a clear
|
|
267
|
+
// error, not a silently-wrong run against the previously-installed app.
|
|
268
|
+
if (!/\bSuccess\b/.test(out)) {
|
|
269
|
+
throw new AdbError(`adb install did not report Success for "${apkPath}": ${out.trim() || "(no output)"}`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
export async function isPackageInstalled(pkg) {
|
|
273
|
+
const out = await adbShell(["pm", "list", "packages", pkg]);
|
|
274
|
+
// `pm list packages <filter>` does a substring match; confirm an exact line.
|
|
275
|
+
return out.split("\n").some((l) => l.trim() === `package:${pkg}`);
|
|
276
|
+
}
|
|
277
|
+
// --- Accessibility tree (uiautomator) ---
|
|
278
|
+
/**
|
|
279
|
+
* Dump the current view hierarchy as uiautomator XML and return it. Mirrors the
|
|
280
|
+
* oracle's `ui_dump`: uiautomator fails transiently with "window in transition"
|
|
281
|
+
* (right after a UI change, writes nothing) or "could not get idle state" (a
|
|
282
|
+
* view animates forever), so we retry a few times, nudging the device awake
|
|
283
|
+
* between tries since a screen-off device never goes idle-with-content.
|
|
284
|
+
*
|
|
285
|
+
* Uses `exec-out uiautomator dump /dev/tty` so the XML comes back on stdout in
|
|
286
|
+
* one shot (no pull); falls back to dump-to-file + pull if the device writes the
|
|
287
|
+
* "dumped to" line to a path instead. Throws AdbError if every attempt fails so
|
|
288
|
+
* the caller can degrade to the vision path.
|
|
289
|
+
*/
|
|
290
|
+
export async function dumpUiautomatorXml() {
|
|
291
|
+
const devPath = "/sdcard/ish-ax.xml";
|
|
292
|
+
let lastErr = "";
|
|
293
|
+
for (let i = 0; i < 3; i++) {
|
|
294
|
+
try {
|
|
295
|
+
// `exec-out ... dump /dev/tty` streams the XML to stdout; on success the
|
|
296
|
+
// output contains the `<hierarchy ...>` root. Some builds still print
|
|
297
|
+
// "UI hierchary dumped to: <path>" instead — handle both.
|
|
298
|
+
const out = await adb(["exec-out", "uiautomator", "dump", "/dev/tty"]);
|
|
299
|
+
const xml = extractHierarchyXml(out);
|
|
300
|
+
if (xml)
|
|
301
|
+
return xml;
|
|
302
|
+
// Fall back to dump-to-file + pull when stdout didn't carry the XML.
|
|
303
|
+
if (/dumped to/i.test(out)) {
|
|
304
|
+
const pulled = await adb(["exec-out", "cat", devPath]);
|
|
305
|
+
const fileXml = extractHierarchyXml(pulled);
|
|
306
|
+
if (fileXml)
|
|
307
|
+
return fileXml;
|
|
308
|
+
}
|
|
309
|
+
lastErr = out.trim() || "(no hierarchy in output)";
|
|
310
|
+
}
|
|
311
|
+
catch (err) {
|
|
312
|
+
lastErr = err instanceof Error ? err.message : String(err);
|
|
313
|
+
}
|
|
314
|
+
// Nudge the device toward an idle, awake, content-bearing state, then retry.
|
|
315
|
+
await wakeDevice();
|
|
316
|
+
await delay(800);
|
|
317
|
+
}
|
|
318
|
+
throw new AdbError(`uiautomator dump failed after retries (last: ${lastErr})`);
|
|
319
|
+
}
|
|
320
|
+
/** Pull the `<hierarchy>...</hierarchy>` payload out of a dump's stdout, or "". */
|
|
321
|
+
function extractHierarchyXml(out) {
|
|
322
|
+
const start = out.indexOf("<hierarchy");
|
|
323
|
+
if (start < 0)
|
|
324
|
+
return "";
|
|
325
|
+
const end = out.lastIndexOf("</hierarchy>");
|
|
326
|
+
if (end < 0)
|
|
327
|
+
return "";
|
|
328
|
+
return out.slice(start, end + "</hierarchy>".length);
|
|
329
|
+
}
|
|
330
|
+
/** WAKEUP keyevent (224) + MENU (82) to dismiss the keyguard. Best-effort. */
|
|
331
|
+
async function wakeDevice() {
|
|
332
|
+
try {
|
|
333
|
+
await pressKeyEvent("KEYCODE_WAKEUP");
|
|
334
|
+
}
|
|
335
|
+
catch {
|
|
336
|
+
// best-effort wake; a failure here shouldn't abort the dump retry loop
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
function delay(ms) {
|
|
340
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
341
|
+
}
|
|
342
|
+
/** All installed package names (the set of `package:<name>` lines). */
|
|
343
|
+
export async function listPackages() {
|
|
344
|
+
const out = await adbShell(["pm", "list", "packages"]);
|
|
345
|
+
const pkgs = new Set();
|
|
346
|
+
for (const line of out.split("\n")) {
|
|
347
|
+
const t = line.trim();
|
|
348
|
+
if (t.startsWith("package:"))
|
|
349
|
+
pkgs.add(t.slice("package:".length));
|
|
350
|
+
}
|
|
351
|
+
return pkgs;
|
|
352
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AndroidDevice — drives a local Android emulator/device via `adb`, implementing
|
|
3
|
+
* the SimulationDevice surface the loop expects.
|
|
4
|
+
*
|
|
5
|
+
* Two resolution paths, mirroring the browser:
|
|
6
|
+
* - ELEMENT (preferred): observe() dumps the uiautomator a11y tree, serializes
|
|
7
|
+
* it to the `[id] role "label"` string the backend DOMLocator reasons over,
|
|
8
|
+
* and keeps a local `shortId → bounds` map. The backend returns a `node_id`;
|
|
9
|
+
* executeAction() looks the bounds up locally and taps the row's CENTER.
|
|
10
|
+
* - VISION (fallback): when the dump fails or yields a sparse tree, observe()
|
|
11
|
+
* returns an empty tree so the backend takes its vision branch and returns
|
|
12
|
+
* NORMALIZED 0-1000 coordinates, which we de-normalize and tap (the original
|
|
13
|
+
* path). The vision path is also taken per-action whenever node_id is absent.
|
|
14
|
+
*
|
|
15
|
+
* Coordinate contract (see scripts/mobile-e2e + CROSS-REPO CONTRACT):
|
|
16
|
+
* adb `screencap` and `input tap` share ONE pixel space — NO DPR correction.
|
|
17
|
+
* - Element path: uiautomator bounds are screencap PIXELS, so the bounds-center
|
|
18
|
+
* is already a pixel center — tap and record it as-is.
|
|
19
|
+
* - Vision path: px = round(x / 1000 * screencapWidth); same for y.
|
|
20
|
+
*/
|
|
21
|
+
import type { LocalStepAction, ContextValue } from "./types.js";
|
|
22
|
+
import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
|
|
23
|
+
export interface AndroidDeviceOptions {
|
|
24
|
+
/** App package name to force-stop/relaunch between participants. May be derived from --app. */
|
|
25
|
+
appPackage?: string;
|
|
26
|
+
/** Local .apk path to install before the run, or a package name to launch. */
|
|
27
|
+
appPath?: string;
|
|
28
|
+
contextValues: ContextValue[];
|
|
29
|
+
log?: (msg: string) => void;
|
|
30
|
+
}
|
|
31
|
+
export declare class AndroidDevice implements SimulationDevice {
|
|
32
|
+
private readonly contextValues;
|
|
33
|
+
private readonly log;
|
|
34
|
+
private appPackage;
|
|
35
|
+
private readonly appPath;
|
|
36
|
+
/** screencap pixel size from the most recent capture — the de-normalization basis. */
|
|
37
|
+
private screenWidth;
|
|
38
|
+
private screenHeight;
|
|
39
|
+
/**
|
|
40
|
+
* shortId → bounds (screencap PIXELS) from the last observe(), the local
|
|
41
|
+
* counterpart of BrowserDevice.lastTreeData. executeAction() resolves a
|
|
42
|
+
* backend `node_id` against this and taps the bounds CENTER (element path).
|
|
43
|
+
*/
|
|
44
|
+
private lastNodeMap;
|
|
45
|
+
/** IME to restore on close (null if we never switched it). */
|
|
46
|
+
private previousIme;
|
|
47
|
+
private adbKeyboardActive;
|
|
48
|
+
constructor(opts: AndroidDeviceOptions);
|
|
49
|
+
launchOrReset(target: string): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Resolve which package to drive, returning a non-null package name or
|
|
52
|
+
* throwing. For a local .apk we read the package straight from its binary
|
|
53
|
+
* AndroidManifest (no aapt) — works whether the apk is fresh or already
|
|
54
|
+
* installed. If that parse fails we fall back to diffing the installed-package
|
|
55
|
+
* list across install, then to a foreground read; an unresolvable case throws
|
|
56
|
+
* and asks for an explicit package.
|
|
57
|
+
*/
|
|
58
|
+
private resolvePackage;
|
|
59
|
+
private ensureAdbKeyboard;
|
|
60
|
+
private refreshDimensions;
|
|
61
|
+
observe(): Promise<DeviceObservation>;
|
|
62
|
+
/**
|
|
63
|
+
* Dump + serialize the uiautomator a11y tree. Any failure (dump retries
|
|
64
|
+
* exhausted, parse error) degrades to an empty tree so the backend falls back
|
|
65
|
+
* to the vision path — a missing tree must never abort the observation.
|
|
66
|
+
*/
|
|
67
|
+
private dumpTree;
|
|
68
|
+
captureScreenshot(): Promise<string>;
|
|
69
|
+
captureScreenshotJpeg(): Promise<Buffer>;
|
|
70
|
+
dimensions(): {
|
|
71
|
+
width: number;
|
|
72
|
+
height: number;
|
|
73
|
+
};
|
|
74
|
+
/** Normalized 0-1000 → screencap pixel space. NO DPR correction. */
|
|
75
|
+
private toPixels;
|
|
76
|
+
/**
|
|
77
|
+
* Resolve the pixel target for a positional action. The ELEMENT path wins when
|
|
78
|
+
* the backend returned a `node_id`: look the bounds up in the last observe()'s
|
|
79
|
+
* nodeMap and tap the row's CENTER (already in screencap pixels). Otherwise the
|
|
80
|
+
* VISION path de-normalizes the backend's 0-1000 coordinates. Returns:
|
|
81
|
+
* - {target} on success,
|
|
82
|
+
* - {stale:true} when a node_id has no bounds (the tree moved under us) — the
|
|
83
|
+
* caller fails the action so the loop forwards DOM_ELEMENT_NOT_FOUND and the
|
|
84
|
+
* agent re-observes/retries,
|
|
85
|
+
* - {target:null} when neither node_id nor coordinates were supplied.
|
|
86
|
+
*/
|
|
87
|
+
private resolveTarget;
|
|
88
|
+
executeAction(action: LocalStepAction): Promise<DeviceActionResult>;
|
|
89
|
+
private failNoCoords;
|
|
90
|
+
private failStaleNode;
|
|
91
|
+
private typeText;
|
|
92
|
+
private scroll;
|
|
93
|
+
private swipe;
|
|
94
|
+
/**
|
|
95
|
+
* Perform a drag: press the GRABBED element, move to the drop point, release.
|
|
96
|
+
* A drag is "click an element and let it go", so the press lands element-
|
|
97
|
+
* center (the resolved `grab` — node_id bounds center, or the vision
|
|
98
|
+
* coordinate when the tree is blind), NOT the backend's vision-estimated
|
|
99
|
+
* start. The release point is the drag END (drag.endX/endY). Both the grab
|
|
100
|
+
* fallback and the end de-normalize against screencap pixels. `inputDrag`
|
|
101
|
+
* (`input draganddrop`) dwells at the press point first so a long-press
|
|
102
|
+
* pickup registers — a slow swipe would read as a directional swipe instead.
|
|
103
|
+
* Returns the grab pixel point to record, or null if there's no end to drag
|
|
104
|
+
* toward.
|
|
105
|
+
*/
|
|
106
|
+
private drag;
|
|
107
|
+
private rotate;
|
|
108
|
+
private failUnsupported;
|
|
109
|
+
currentUrl(): string;
|
|
110
|
+
close(): Promise<void>;
|
|
111
|
+
}
|