@ishlabs/cli 0.25.0 → 0.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +42 -0
- package/dist/commands/doctor.js +359 -0
- package/dist/commands/iteration.js +23 -5
- package/dist/commands/study-participant.js +1 -1
- package/dist/commands/study-run.js +26 -1
- package/dist/commands/study-screenshots.js +38 -5
- package/dist/index.js +2 -0
- package/dist/lib/api-client.d.ts +3 -0
- package/dist/lib/api-client.js +6 -1
- package/dist/lib/docs.js +15 -3
- package/dist/lib/local-sim/actions.d.ts +18 -0
- package/dist/lib/local-sim/actions.js +32 -0
- package/dist/lib/local-sim/adb.d.ts +33 -0
- package/dist/lib/local-sim/adb.js +121 -17
- package/dist/lib/local-sim/android.d.ts +7 -1
- package/dist/lib/local-sim/android.js +21 -1
- package/dist/lib/local-sim/coordinates.d.ts +4 -4
- package/dist/lib/local-sim/coordinates.js +4 -4
- package/dist/lib/local-sim/device.d.ts +21 -2
- package/dist/lib/local-sim/device.js +1 -1
- package/dist/lib/local-sim/ios.d.ts +33 -10
- package/dist/lib/local-sim/ios.js +88 -20
- package/dist/lib/local-sim/loop.js +134 -25
- package/dist/lib/local-sim/native-a11y.d.ts +21 -7
- package/dist/lib/local-sim/native-a11y.js +82 -47
- package/dist/lib/local-sim/simctl.d.ts +28 -43
- package/dist/lib/local-sim/simctl.js +53 -142
- package/dist/lib/local-sim/types.d.ts +13 -2
- package/dist/lib/local-sim/xcuitest.d.ts +60 -0
- package/dist/lib/local-sim/xcuitest.js +303 -0
- package/dist/lib/paths.d.ts +14 -0
- package/dist/lib/paths.js +21 -0
- package/dist/lib/report-readiness.d.ts +44 -0
- package/dist/lib/report-readiness.js +74 -0
- package/dist/lib/skill-content.js +2 -0
- package/package.json +1 -1
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* IOSDevice — drives a local iOS simulator via `xcrun simctl` +
|
|
2
|
+
* IOSDevice — drives a local iOS simulator via `xcrun simctl` (lifecycle +
|
|
3
|
+
* screenshot) and WebDriverAgent/XCUITest (UI + a11y; see xcuitest.ts),
|
|
3
4
|
* implementing the SimulationDevice surface the loop expects. Mirrors
|
|
4
5
|
* AndroidDevice; the one substantive difference is the coordinate space.
|
|
5
6
|
*
|
|
6
7
|
* Two resolution paths, mirroring the browser:
|
|
7
|
-
* - ELEMENT (preferred): observe() reads
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
8
|
+
* - ELEMENT (preferred): observe() reads WDA's `/source` a11y tree, serializes
|
|
9
|
+
* it to the `[id] role "label"` string the backend DOMLocator reasons over,
|
|
10
|
+
* and keeps a local `shortId → bounds` map (bounds in POINTS). The backend
|
|
11
|
+
* returns a `node_id`; executeAction() looks the bounds up and taps the
|
|
12
|
+
* element's CENTER.
|
|
12
13
|
* - VISION (fallback): when the tree is empty/sparse, observe() returns an
|
|
13
14
|
* empty tree so the backend takes its vision branch and returns NORMALIZED
|
|
14
15
|
* 0-1000 coordinates. Also taken per-action whenever node_id is absent.
|
|
@@ -16,7 +17,7 @@
|
|
|
16
17
|
* COORDINATE SPACE — two spaces, the key difference from Android (where
|
|
17
18
|
* screencap and tap share one pixel space):
|
|
18
19
|
* `simctl io booted screenshot` is in PIXELS (e.g. 1179x2556 @3x), but
|
|
19
|
-
*
|
|
20
|
+
* WDA taps/swipes AND the `/source` a11y frames are POINTS (393x852).
|
|
20
21
|
* The invariant in BOTH paths: TAP in points, RECORD in pixels, because the
|
|
21
22
|
* loop re-normalizes the recorded coord against dimensions() (PIXELS).
|
|
22
23
|
* - VISION: tap pt = round(n/1000 * pointSize); record px = round(n/1000 * pixelSize).
|
|
@@ -30,10 +31,12 @@
|
|
|
30
31
|
* backend never converts coords with screen_width/height.
|
|
31
32
|
*/
|
|
32
33
|
import { resolveTextValue } from "./actions.js";
|
|
33
|
-
import { requireOneBootedSimulator,
|
|
34
|
+
import { requireOneBootedSimulator, screenshotPng, terminateApp, launchApp, installApp, isAppInstalled, bundleIdFromApp, appBuildFromSimulator, } from "./simctl.js";
|
|
35
|
+
// iOS UI interaction + a11y run through WebDriverAgent (XCUITest), not idb.
|
|
36
|
+
import { ensureWda, closeWda, describeScreen, describeAll, uiTap, uiLongPress, uiSwipe, uiText, uiKey, HID_KEY_RETURN, } from "./xcuitest.js";
|
|
34
37
|
import { isLocalPath } from "../upload.js";
|
|
35
38
|
import { deNormalizePoint, deNormalizeDrag, pointToPixel } from "./coordinates.js";
|
|
36
|
-
import {
|
|
39
|
+
import { parseXcuiHierarchy, serializeNativeTree, boundsCenter } from "./native-a11y.js";
|
|
37
40
|
// Let animations/transitions settle before the next observation so the
|
|
38
41
|
// screenshot the LLM reasons over reflects the action's result.
|
|
39
42
|
const POST_GESTURE_SETTLE_MS = 500;
|
|
@@ -61,6 +64,8 @@ export class IOSDevice {
|
|
|
61
64
|
appPath;
|
|
62
65
|
/** udid of the single booted simulator we drive. */
|
|
63
66
|
udid = "";
|
|
67
|
+
/** Set once the WebDriverAgent runner is up, so the startup note logs once. */
|
|
68
|
+
wdaStarted = false;
|
|
64
69
|
/** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
|
|
65
70
|
pointWidth = 0;
|
|
66
71
|
pointHeight = 0;
|
|
@@ -96,6 +101,14 @@ export class IOSDevice {
|
|
|
96
101
|
this.bundleId = await this.resolveBundleId(target);
|
|
97
102
|
}
|
|
98
103
|
const bundleId = this.bundleId;
|
|
104
|
+
// Bring up the WebDriverAgent runner (install + simctl-launch the prebuilt
|
|
105
|
+
// xctrunner, open a session). Idempotent and reused across participants, so
|
|
106
|
+
// the ~30-60s first-launch cost is paid once per run.
|
|
107
|
+
if (!this.wdaStarted) {
|
|
108
|
+
this.log("Starting the iOS automation runner (WebDriverAgent); first launch can take ~30-60s...");
|
|
109
|
+
}
|
|
110
|
+
await ensureWda(this.udid);
|
|
111
|
+
this.wdaStarted = true;
|
|
99
112
|
// Prime screen geometry (points) before the first de-normalization.
|
|
100
113
|
await this.refreshScreen();
|
|
101
114
|
// Per-participant reset: terminate then relaunch from a clean state.
|
|
@@ -103,6 +116,21 @@ export class IOSDevice {
|
|
|
103
116
|
await launchApp(this.udid, bundleId);
|
|
104
117
|
await settle(1500); // cold start needs longer than a gesture settle
|
|
105
118
|
}
|
|
119
|
+
/**
|
|
120
|
+
* The installed app's version/build, read off the simulator after
|
|
121
|
+
* launchOrReset has resolved the bundle id. Best-effort — null until the
|
|
122
|
+
* bundle id is known, or if simctl/plutil can't report it.
|
|
123
|
+
*/
|
|
124
|
+
async appBuild() {
|
|
125
|
+
if (!this.bundleId || !this.udid)
|
|
126
|
+
return null;
|
|
127
|
+
const meta = await appBuildFromSimulator(this.udid, this.bundleId);
|
|
128
|
+
return {
|
|
129
|
+
package: this.bundleId,
|
|
130
|
+
version: meta?.version ?? null,
|
|
131
|
+
build: meta?.build ?? null,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
106
134
|
/**
|
|
107
135
|
* Resolve the bundle id to drive, returning a non-null id or throwing.
|
|
108
136
|
* Installs a local `.app` first and reads its CFBundleIdentifier from
|
|
@@ -171,7 +199,7 @@ export class IOSDevice {
|
|
|
171
199
|
};
|
|
172
200
|
}
|
|
173
201
|
/**
|
|
174
|
-
* Read + serialize
|
|
202
|
+
* Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
|
|
175
203
|
* failure (retries exhausted on a trivial tree, parse error) degrades to an
|
|
176
204
|
* empty tree so the backend falls back to vision — a missing tree must never
|
|
177
205
|
* abort the observation.
|
|
@@ -179,7 +207,7 @@ export class IOSDevice {
|
|
|
179
207
|
async dumpTree() {
|
|
180
208
|
try {
|
|
181
209
|
const json = await describeAll(this.udid);
|
|
182
|
-
const nodes =
|
|
210
|
+
const nodes = parseXcuiHierarchy(json);
|
|
183
211
|
const tree = serializeNativeTree(nodes);
|
|
184
212
|
this.log(`a11y tree: ${tree.nodeMap.size} node(s)`);
|
|
185
213
|
return tree;
|
|
@@ -206,7 +234,7 @@ export class IOSDevice {
|
|
|
206
234
|
// separately in points (see toPoints()).
|
|
207
235
|
return { width: this.pixelWidth, height: this.pixelHeight };
|
|
208
236
|
}
|
|
209
|
-
/** Normalized 0-1000 → POINT space (
|
|
237
|
+
/** Normalized 0-1000 → POINT space (WDA taps/swipes take points). */
|
|
210
238
|
toPoints(c) {
|
|
211
239
|
return deNormalizePoint(c, this.pointWidth, this.pointHeight);
|
|
212
240
|
}
|
|
@@ -227,7 +255,7 @@ export class IOSDevice {
|
|
|
227
255
|
const bounds = this.lastNodeMap.get(action.node_id);
|
|
228
256
|
if (!bounds)
|
|
229
257
|
return { pt: null, px: null, stale: true };
|
|
230
|
-
const pt = boundsCenter(bounds); // POINTS —
|
|
258
|
+
const pt = boundsCenter(bounds); // POINTS — WDA taps directly
|
|
231
259
|
const px = pointToPixel(pt, this.pointWidth, this.pointHeight, this.pixelWidth, this.pixelHeight);
|
|
232
260
|
return { pt, px, stale: false };
|
|
233
261
|
}
|
|
@@ -237,7 +265,7 @@ export class IOSDevice {
|
|
|
237
265
|
}
|
|
238
266
|
async executeAction(action) {
|
|
239
267
|
try {
|
|
240
|
-
// pt drives the
|
|
268
|
+
// pt drives the WDA TAP (points); px is what we RECORD (pixels). ELEMENT
|
|
241
269
|
// path: pt = bounds-center, px = that center scaled to pixels. VISION
|
|
242
270
|
// path: both derive from the same normalized coord. Either way the tap
|
|
243
271
|
// lands right and the recorded px round-trips against dimensions().
|
|
@@ -279,6 +307,11 @@ export class IOSDevice {
|
|
|
279
307
|
await this.navigateBack();
|
|
280
308
|
break;
|
|
281
309
|
}
|
|
310
|
+
case "open_system_panel": {
|
|
311
|
+
// Element-less, like navigate_back: best-effort top-edge pull-down.
|
|
312
|
+
await this.openSystemPanel(action.panel === "quick_settings" ? "quick_settings" : "notifications");
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
282
315
|
case "drag": {
|
|
283
316
|
// A drag GRABS an element and RELEASES it elsewhere ("click the
|
|
284
317
|
// element, move, let go") — distinct from a swipe (element-less
|
|
@@ -364,7 +397,7 @@ export class IOSDevice {
|
|
|
364
397
|
await settle(250);
|
|
365
398
|
}
|
|
366
399
|
const text = resolveTextValue(action, this.contextValues);
|
|
367
|
-
//
|
|
400
|
+
// WDA text input appends to the focused field; for click_type (replace) there
|
|
368
401
|
// is no idb "clear", so we rely on the field being empty after focus. The
|
|
369
402
|
// vision agent typically taps an empty field, so this matches Android's
|
|
370
403
|
// common path; a true select-all clear isn't exposed by idb.
|
|
@@ -470,10 +503,10 @@ export class IOSDevice {
|
|
|
470
503
|
* do drive the system gesture) when no back button is visible.
|
|
471
504
|
*/
|
|
472
505
|
async navigateBack() {
|
|
473
|
-
const nodes =
|
|
506
|
+
const nodes = parseXcuiHierarchy(await describeAll(this.udid));
|
|
474
507
|
const back = this.findBackButton(nodes);
|
|
475
508
|
if (back) {
|
|
476
|
-
const c = boundsCenter(back.bounds); // POINTS —
|
|
509
|
+
const c = boundsCenter(back.bounds); // POINTS — WDA taps directly
|
|
477
510
|
await uiTap(this.udid, c.x, c.y);
|
|
478
511
|
return;
|
|
479
512
|
}
|
|
@@ -483,6 +516,39 @@ export class IOSDevice {
|
|
|
483
516
|
const midY = Math.round(this.pointHeight / 2);
|
|
484
517
|
await uiSwipe(this.udid, 1, midY, Math.round(this.pointWidth * 0.5), midY, 300);
|
|
485
518
|
}
|
|
519
|
+
/**
|
|
520
|
+
* Best-effort open of an iOS system panel by swiping down from the top edge.
|
|
521
|
+
* iOS has no `cmd statusbar` equivalent, so on a Face-ID layout:
|
|
522
|
+
* - notifications → Notification Center: swipe down from the top-CENTER.
|
|
523
|
+
* - quick_settings → Control Center: swipe down from the top-RIGHT corner.
|
|
524
|
+
* Coordinates are POINTS (idb consumes points; see toPoints()/the swipe()
|
|
525
|
+
* helper). This is FLAKY on the simulator — idb's synthetic touch frequently
|
|
526
|
+
* doesn't trigger the system edge gesture (the same limitation navigateBack's
|
|
527
|
+
* edge-swipe hits). We compare a before/after screenshot and log LOUDLY when
|
|
528
|
+
* the screen didn't change, rather than silently reporting success, so a
|
|
529
|
+
* no-op is visible in the run. The executeAction caller still returns
|
|
530
|
+
* success:true (the gesture was attempted); the loud log is the signal.
|
|
531
|
+
*/
|
|
532
|
+
async openSystemPanel(panel) {
|
|
533
|
+
const before = await screenshotPng();
|
|
534
|
+
const w = this.pointWidth;
|
|
535
|
+
const h = this.pointHeight;
|
|
536
|
+
// Start ON the top edge and travel a third of the screen down. Control
|
|
537
|
+
// Center lives under the top-right (battery/status) corner on Face-ID
|
|
538
|
+
// devices; Notification Center under the top-center notch area.
|
|
539
|
+
const startX = panel === "quick_settings" ? Math.round(w * 0.92) : Math.round(w * 0.5);
|
|
540
|
+
const startY = 1;
|
|
541
|
+
const endY = Math.round(h * 0.35);
|
|
542
|
+
await uiSwipe(this.udid, startX, startY, startX, endY, 350);
|
|
543
|
+
await settle();
|
|
544
|
+
// Loudly surface a no-op: the simulator's synthetic touch often can't drive
|
|
545
|
+
// the system edge gesture. An identical screenshot means the panel didn't open.
|
|
546
|
+
const after = await screenshotPng();
|
|
547
|
+
if (before.equals(after)) {
|
|
548
|
+
this.log(`open_system_panel (${panel}): top-edge swipe produced no visible change — ` +
|
|
549
|
+
`the simulator's synthetic touch likely didn't trigger the system gesture (flaky on the simulator).`);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
486
552
|
/**
|
|
487
553
|
* The nav-bar back button: the leading (leftmost) actionable button in the
|
|
488
554
|
* top nav-bar band. iOS HIG guarantees "back" is the leading nav item in a
|
|
@@ -526,7 +592,7 @@ export class IOSDevice {
|
|
|
526
592
|
// the agent can adapt.
|
|
527
593
|
const hint = {
|
|
528
594
|
pinch_zoom: "no multi-touch on the native driver; the agent should zoom via double_tap",
|
|
529
|
-
rotate_device: "
|
|
595
|
+
rotate_device: "rotation is not wired on the native driver; leave orientation as-is",
|
|
530
596
|
keyboard_shortcut: "no hardware keyboard on the native driver; use on-screen taps/text_input",
|
|
531
597
|
switch_tab: "tabs are a browser concept; the native app has a single window",
|
|
532
598
|
close_tab: "tabs are a browser concept; the native app has a single window",
|
|
@@ -540,7 +606,9 @@ export class IOSDevice {
|
|
|
540
606
|
return "";
|
|
541
607
|
}
|
|
542
608
|
async close() {
|
|
543
|
-
//
|
|
544
|
-
//
|
|
609
|
+
// Tear down the WebDriverAgent session (the runner is left installed on the
|
|
610
|
+
// shared simulator for the next run). The app resets via launchOrReset; no
|
|
611
|
+
// IME state to restore on iOS.
|
|
612
|
+
await closeWda(this.udid);
|
|
545
613
|
}
|
|
546
614
|
}
|
|
@@ -7,8 +7,9 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { launchSharedBrowser, FULL_PAGE_HEIGHT_CAP_PX_MOBILE, FULL_PAGE_HEIGHT_CAP_PX_DESKTOP, } from "./browser.js";
|
|
9
9
|
import { uploadScreenshot } from "./upload.js";
|
|
10
|
-
import { detectNoVisibleChange, describeAction } from "./actions.js";
|
|
10
|
+
import { detectNoVisibleChange, describeAction, classifyStepKind } from "./actions.js";
|
|
11
11
|
import { createDevice } from "./device.js";
|
|
12
|
+
import pkg from "../../../package.json" with { type: "json" };
|
|
12
13
|
import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
|
|
13
14
|
/**
|
|
14
15
|
* Native (mobile) platforms drive a single physical device via screenshot →
|
|
@@ -49,6 +50,7 @@ export function flattenAction(raw, nodeId = null, nodeDescription = null) {
|
|
|
49
50
|
key: a.key ?? null,
|
|
50
51
|
tab_id: a.tab_id ?? null,
|
|
51
52
|
orientation: a.orientation ?? null,
|
|
53
|
+
panel: a.panel ?? null,
|
|
52
54
|
scale: a.scale ?? null,
|
|
53
55
|
// Native path: ResolvedAction.coordinates (top level of the resolved_actions
|
|
54
56
|
// entry) is the single {x,y} execution point. Fall back to the nested action
|
|
@@ -112,6 +114,34 @@ const SENTIMENT_ICONS = {
|
|
|
112
114
|
Positive: "+", Negative: "-", Neutral: "~",
|
|
113
115
|
Frustrated: "!", Confused: "?", Delighted: "*",
|
|
114
116
|
};
|
|
117
|
+
const CLI_VERSION = pkg.version;
|
|
118
|
+
/**
|
|
119
|
+
* Stamp the app build this run drove onto the iteration, so the web app's
|
|
120
|
+
* run-settings card can show which build the iteration is on. Best-effort:
|
|
121
|
+
* a native run never depends on this landing, so failures are warned, not
|
|
122
|
+
* thrown. Only native platforms carry a build.
|
|
123
|
+
*/
|
|
124
|
+
async function reportObservedApp(client, iterationId, platform, build, log) {
|
|
125
|
+
if (platform !== "ios" && platform !== "android")
|
|
126
|
+
return;
|
|
127
|
+
try {
|
|
128
|
+
await client.post(`/iterations/${iterationId}/observed-app`, {
|
|
129
|
+
platform,
|
|
130
|
+
package: build.package,
|
|
131
|
+
version: build.version,
|
|
132
|
+
build: build.build,
|
|
133
|
+
cli_version: CLI_VERSION,
|
|
134
|
+
});
|
|
135
|
+
const label = [build.version, build.build ? `(${build.build})` : null]
|
|
136
|
+
.filter(Boolean)
|
|
137
|
+
.join(" ");
|
|
138
|
+
log(`Recorded app build${label ? `: ${label}` : ""}`);
|
|
139
|
+
}
|
|
140
|
+
catch (err) {
|
|
141
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
142
|
+
console.warn(`Could not record app build for the iteration: ${msg}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
115
145
|
/**
|
|
116
146
|
* Run local simulations — parallel when multiple participants, sequential by default.
|
|
117
147
|
* Use --parallel <n> to control concurrency (default: number of participants).
|
|
@@ -138,6 +168,16 @@ export async function runLocalSimulations(client, opts) {
|
|
|
138
168
|
log("Native (android/ios) runs drive a single device — running sequentially.");
|
|
139
169
|
}
|
|
140
170
|
const concurrency = isNativeRun ? 1 : (opts.parallel ?? opts.participantIds.length);
|
|
171
|
+
// Native runs stamp the app build onto the iteration once — every
|
|
172
|
+
// participant in a run drives the same installed build, so dedupe to a
|
|
173
|
+
// single best-effort POST after the first device resolves its app.
|
|
174
|
+
let appBuildReported = false;
|
|
175
|
+
const reportAppBuild = (build, platform) => {
|
|
176
|
+
if (appBuildReported)
|
|
177
|
+
return;
|
|
178
|
+
appBuildReported = true;
|
|
179
|
+
void reportObservedApp(client, opts.iterationId, platform, build, log);
|
|
180
|
+
};
|
|
141
181
|
try {
|
|
142
182
|
if (concurrency <= 1 || opts.participantIds.length <= 1) {
|
|
143
183
|
// Sequential execution — each participant owns its own browser
|
|
@@ -148,7 +188,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
148
188
|
log(`\nStarting local simulation for ${participantName}...`);
|
|
149
189
|
try {
|
|
150
190
|
const participantLog = (msg) => log(`[${participantName}] ${msg}`);
|
|
151
|
-
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled);
|
|
191
|
+
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild);
|
|
152
192
|
log(`Completed: ${participantName}`);
|
|
153
193
|
}
|
|
154
194
|
catch (err) {
|
|
@@ -182,7 +222,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
182
222
|
const participantLog = (msg) => log(`[${participantName}] ${msg}`);
|
|
183
223
|
participantLog("Starting...");
|
|
184
224
|
try {
|
|
185
|
-
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, sharedBrowser);
|
|
225
|
+
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild, sharedBrowser);
|
|
186
226
|
participantLog("Completed");
|
|
187
227
|
}
|
|
188
228
|
catch (err) {
|
|
@@ -202,7 +242,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
202
242
|
process.off("SIGINT", onSigint);
|
|
203
243
|
}
|
|
204
244
|
}
|
|
205
|
-
async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, sharedBrowser) {
|
|
245
|
+
async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, onAppBuild, sharedBrowser) {
|
|
206
246
|
// Step 1: Initialize session
|
|
207
247
|
const initResponse = await client.localSimInit({
|
|
208
248
|
participant_id: participantId,
|
|
@@ -273,6 +313,19 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
273
313
|
try {
|
|
274
314
|
// Step 3: Launch / navigate the target to its starting point.
|
|
275
315
|
await device.launchOrReset(launchTarget);
|
|
316
|
+
// Step 3b: Capture the installed app's build (native only). Best-effort —
|
|
317
|
+
// the dedupe in runLocalSimulations keeps this to one POST per run, and a
|
|
318
|
+
// failed read or report never disturbs the simulation.
|
|
319
|
+
if (onAppBuild) {
|
|
320
|
+
try {
|
|
321
|
+
const observed = await device.appBuild?.();
|
|
322
|
+
if (observed)
|
|
323
|
+
onAppBuild(observed, platform);
|
|
324
|
+
}
|
|
325
|
+
catch {
|
|
326
|
+
// ignore — build capture is non-essential
|
|
327
|
+
}
|
|
328
|
+
}
|
|
276
329
|
// Step 4: Run assignment loop
|
|
277
330
|
for (let assignmentIdx = 0; assignmentIdx < session.assignments.length; assignmentIdx++) {
|
|
278
331
|
const assignment = session.assignments[assignmentIdx];
|
|
@@ -283,6 +336,12 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
283
336
|
// status when the loop ends because the agent terminated (completed vs
|
|
284
337
|
// abandoned). Stays "in_progress" if the loop hits max_steps.
|
|
285
338
|
let lastAssignmentStatus = "in_progress";
|
|
339
|
+
// Frame continuity (native): carry the PREVIOUS step's logical-screen
|
|
340
|
+
// classification + matched frame forward, so this step's match-frame call
|
|
341
|
+
// can tell the backend to reuse the frame when the screen didn't change
|
|
342
|
+
// (pure scroll / non-submitting keyboard). Reset per assignment.
|
|
343
|
+
let lastStepKind = "none";
|
|
344
|
+
let lastFrameVersionId;
|
|
286
345
|
while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
|
|
287
346
|
// OBSERVE — the device refreshes its own active surface (popup /
|
|
288
347
|
// switch_tab for browser) before capturing. (The browser device emits
|
|
@@ -394,12 +453,16 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
394
453
|
const actionDescs = [];
|
|
395
454
|
const elementNames = [];
|
|
396
455
|
const actionDebugEntries = [];
|
|
456
|
+
// Per-action success (index-aligned with stepResponse.actions), used to
|
|
457
|
+
// classify this step's logical-screen kind for frame continuity.
|
|
458
|
+
const perActionSuccess = [];
|
|
397
459
|
const preActionScreenshot = await device.captureScreenshot();
|
|
398
460
|
for (let i = 0; i < stepResponse.actions.length; i++) {
|
|
399
461
|
if (isCancelled())
|
|
400
462
|
break;
|
|
401
463
|
const action = stepResponse.actions[i];
|
|
402
464
|
const result = await device.executeAction(action);
|
|
465
|
+
perActionSuccess[i] = result.success;
|
|
403
466
|
const desc = describeAction(action);
|
|
404
467
|
debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
|
|
405
468
|
const openedNewTab = result.openedNewTab;
|
|
@@ -414,28 +477,44 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
414
477
|
const actionType = action.type || "unknown";
|
|
415
478
|
const INTERNAL_ACTIONS = new Set(["think"]);
|
|
416
479
|
if (!INTERNAL_ACTIONS.has(actionType)) {
|
|
480
|
+
// Pack `data` to match the hosted sim's map_action_to_db so native
|
|
481
|
+
// rows render identically. value_type lets the FE flag var/secret;
|
|
482
|
+
// drag's full path goes under data.coordinates (0-1000), not a
|
|
483
|
+
// bespoke drag_end. Secret `value` stays masked (it's the variable
|
|
484
|
+
// key, not the resolved secret — masking is strictly safer than the
|
|
485
|
+
// web path, and value_type now drives the FE lock glyph).
|
|
486
|
+
const actionData = {
|
|
487
|
+
...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
|
|
488
|
+
...(action.value_type && { value_type: action.value_type }),
|
|
489
|
+
...(action.mode && { mode: action.mode }),
|
|
490
|
+
...(action.submit && { submit: action.submit }),
|
|
491
|
+
...(action.direction && { direction: action.direction }),
|
|
492
|
+
...(action.amount && { amount: action.amount }),
|
|
493
|
+
...(action.count && action.count > 1 && { count: action.count }),
|
|
494
|
+
...(action.duration_ms && { duration_ms: action.duration_ms }),
|
|
495
|
+
...(action.modifiers?.length && { modifiers: action.modifiers }),
|
|
496
|
+
...(action.key && { key: action.key }),
|
|
497
|
+
...(action.tab_id && { tab_id: action.tab_id }),
|
|
498
|
+
...(action.orientation && { orientation: action.orientation }),
|
|
499
|
+
...(action.panel && { panel: action.panel }),
|
|
500
|
+
...(action.drag && {
|
|
501
|
+
coordinates: {
|
|
502
|
+
startX: action.drag.startX,
|
|
503
|
+
startY: action.drag.startY,
|
|
504
|
+
endX: action.drag.endX,
|
|
505
|
+
endY: action.drag.endY,
|
|
506
|
+
},
|
|
507
|
+
}),
|
|
508
|
+
...(openedNewTab && { opened_new_tab: true }),
|
|
509
|
+
};
|
|
417
510
|
actionDatas.push({
|
|
418
511
|
action_type: actionType,
|
|
419
512
|
element_label: action.element_name ?? null,
|
|
420
513
|
element_type: action.element_type ?? null,
|
|
421
|
-
coordinates
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
...(action.submit && { submit: action.submit }),
|
|
426
|
-
...(action.direction && { direction: action.direction }),
|
|
427
|
-
...(action.amount && { amount: action.amount }),
|
|
428
|
-
...(action.count && action.count > 1 && { count: action.count }),
|
|
429
|
-
...(action.duration_ms && { duration_ms: action.duration_ms }),
|
|
430
|
-
...(action.modifiers?.length && { modifiers: action.modifiers }),
|
|
431
|
-
...(action.key && { key: action.key }),
|
|
432
|
-
...(action.tab_id && { tab_id: action.tab_id }),
|
|
433
|
-
...(action.orientation && { orientation: action.orientation }),
|
|
434
|
-
// The recorded `coordinates` is the drag START; persist the END
|
|
435
|
-
// (normalized 0-1000) too so the journey captures the full path.
|
|
436
|
-
...(action.drag && { drag_end: { x: action.drag.endX, y: action.drag.endY } }),
|
|
437
|
-
...(openedNewTab && { opened_new_tab: true }),
|
|
438
|
-
},
|
|
514
|
+
// Drag's path lives in data.coordinates; the hosted sim leaves the
|
|
515
|
+
// top-level coordinates null for a drag.
|
|
516
|
+
coordinates: action.drag ? null : normalizedCoords,
|
|
517
|
+
data: Object.keys(actionData).length ? actionData : null,
|
|
439
518
|
order: i,
|
|
440
519
|
});
|
|
441
520
|
}
|
|
@@ -492,6 +571,15 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
492
571
|
// Native: drive FrameSourceType.ANDROID/IOS directly; browser falls
|
|
493
572
|
// back to screen_format server-side.
|
|
494
573
|
platform,
|
|
574
|
+
// Frame continuity: these describe the transition INTO this
|
|
575
|
+
// observation, produced by the PREVIOUS step's action. When that
|
|
576
|
+
// step was a pure scroll / non-submitting keyboard on a native
|
|
577
|
+
// device, the logical screen didn't change — tell the backend to
|
|
578
|
+
// reuse the previous frame instead of minting a new one off the
|
|
579
|
+
// shifted pixels. Carried from lastStepKind / lastFrameVersionId,
|
|
580
|
+
// updated AFTER this call for the next iteration.
|
|
581
|
+
...(isNative && lastFrameVersionId ? { previous_frame_version_id: lastFrameVersionId } : {}),
|
|
582
|
+
same_screen_continuation: isNative && (lastStepKind === "scroll" || lastStepKind === "keyboard"),
|
|
495
583
|
});
|
|
496
584
|
frameVersionId = matchResult.frame_version_id;
|
|
497
585
|
}
|
|
@@ -499,6 +587,11 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
499
587
|
const msg = err instanceof Error ? err.message : String(err);
|
|
500
588
|
log(` Warning: frame matching failed — ${msg}`);
|
|
501
589
|
}
|
|
590
|
+
// Carry THIS step's logical-screen classification + matched frame
|
|
591
|
+
// forward for the NEXT iteration's match-frame call (consumed above as
|
|
592
|
+
// last*). Classify after the call so ordering is consume-then-update.
|
|
593
|
+
lastStepKind = classifyStepKind(stepResponse.actions, perActionSuccess);
|
|
594
|
+
lastFrameVersionId = frameVersionId;
|
|
502
595
|
// Debug-only: capture post-action screenshot to show result
|
|
503
596
|
let postActionBase64;
|
|
504
597
|
if (isDebugEnabled()) {
|
|
@@ -518,7 +611,7 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
518
611
|
forwards.push({ type: "LOOP_DETECTED", content: "A repetitive action cycle was detected. Try a different approach." });
|
|
519
612
|
}
|
|
520
613
|
// Record interaction (1-indexed step for backend)
|
|
521
|
-
|
|
614
|
+
const interaction = {
|
|
522
615
|
step: step + 1,
|
|
523
616
|
assignment_id: assignment.id,
|
|
524
617
|
...(screenshotUrl ? { screenshot_url: screenshotUrl } : { screenshot_base64: obsBase64 }),
|
|
@@ -542,7 +635,24 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
542
635
|
// Server reduces this to Interaction.tab when N >= 2; omit on
|
|
543
636
|
// single-tab steps to keep the payload (and DB column) null.
|
|
544
637
|
...(tabsSnapshot.length >= 2 ? { tabs: tabsSnapshot } : {}),
|
|
545
|
-
}
|
|
638
|
+
};
|
|
639
|
+
// Keep the in-memory array for the debug HTML report.
|
|
640
|
+
interactions.push(interaction);
|
|
641
|
+
// Stream this interaction live so the backend persists + commits it
|
|
642
|
+
// immediately and fires INTERACTION_CREATED in realtime. A streaming
|
|
643
|
+
// failure must never abort the run — log and continue (the run-end
|
|
644
|
+
// finalize call still records the terminal state).
|
|
645
|
+
try {
|
|
646
|
+
await client.localSimRecordInteraction({
|
|
647
|
+
participant_id: session.participant_id,
|
|
648
|
+
product_id: session.product_id,
|
|
649
|
+
interaction,
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
catch (err) {
|
|
653
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
654
|
+
log(` Warning: failed to stream interaction ${step + 1} — ${msg}`);
|
|
655
|
+
}
|
|
546
656
|
// Update history for next step
|
|
547
657
|
history.push({
|
|
548
658
|
comment: stepResponse.comment,
|
|
@@ -633,7 +743,6 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
633
743
|
await client.localSimRecord({
|
|
634
744
|
participant_id: session.participant_id,
|
|
635
745
|
product_id: session.product_id,
|
|
636
|
-
interactions,
|
|
637
746
|
final_status: finalStatus,
|
|
638
747
|
assignment_statuses: assignmentStatuses,
|
|
639
748
|
});
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*
|
|
13
13
|
* COORDINATE SPACE — carried, not converted, by this module:
|
|
14
14
|
* - Android `uiautomator dump` bounds are screencap PIXELS (`space: "px"`).
|
|
15
|
-
* - iOS
|
|
15
|
+
* - iOS WebDriverAgent /source frames are POINTS (`space: "points"`).
|
|
16
16
|
* The device de-normalizes/taps in its own space (AndroidDevice taps pixels;
|
|
17
17
|
* IOSDevice taps points), so the `space` tag tells the caller which dimension a
|
|
18
18
|
* node's bounds-center belongs to. This module never mixes the two.
|
|
@@ -65,15 +65,29 @@ export interface NativeTree {
|
|
|
65
65
|
*/
|
|
66
66
|
export declare function parseUiautomatorXml(xml: string): NativeNode[];
|
|
67
67
|
/**
|
|
68
|
-
* Parse `
|
|
69
|
-
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
68
|
+
* Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
|
|
69
|
+
* FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
|
|
70
|
+
* so `serializeNativeTree` consumes it unchanged. WDA's `type` matches idb's iOS
|
|
71
|
+
* types (Button/StaticText/SearchField/Cell/Image/Application…), so
|
|
72
|
+
* `normalizeRole`/`IOS_ACTIONABLE_TYPES`/`frameToBounds` all apply as-is.
|
|
73
|
+
*
|
|
74
|
+
* KEY: WDA's `/source` is the FULL XCUIElement tree — every container and leaf —
|
|
75
|
+
* NOT idb's clean accessibility-elements list. iOS settings rows surface as an
|
|
76
|
+
* accessible `Button` ("General", isAccessible=1) that ALSO contains a duplicate
|
|
77
|
+
* inner `StaticText` ("General", isAccessible=0) and is wrapped in a `Cell`
|
|
78
|
+
* (isAccessible=0). Emitting all three yields "General General" + empty
|
|
79
|
+
* listitems. So we emit ONLY `isAccessible && isVisible` nodes — exactly the
|
|
80
|
+
* VoiceOver-exposed set idb returned: the labeled Button is both the label and
|
|
81
|
+
* the tap target; the duplicate StaticText and the wrapping Cell are pruned. A
|
|
82
|
+
* sparse a11y tree degrades to the loop's vision fallback, so strict filtering
|
|
83
|
+
* never strands the run.
|
|
84
|
+
*
|
|
85
|
+
* Accepts either the raw tree or the W3C `{ value: <tree> }` envelope WDA returns.
|
|
72
86
|
*/
|
|
73
|
-
export declare function
|
|
87
|
+
export declare function parseXcuiHierarchy(json: string): NativeNode[];
|
|
74
88
|
/**
|
|
75
89
|
* Serialize a flat NativeNode list (from `parseUiautomatorXml` /
|
|
76
|
-
* `
|
|
90
|
+
* `parseXcuiHierarchy`) into the `[id] role "label"` string the DOMLocator
|
|
77
91
|
* reasons over, plus a `shortId → bounds` map for local tap resolution.
|
|
78
92
|
*
|
|
79
93
|
* Emission rules (kept tight, like the DOM serializer):
|