@ishlabs/cli 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +16 -0
- package/dist/commands/doctor.js +34 -9
- package/dist/commands/iteration.js +23 -5
- package/dist/commands/study-participant.js +1 -1
- package/dist/commands/study-run.js +26 -1
- package/dist/commands/study-screenshots.js +38 -5
- package/dist/lib/api-client.d.ts +4 -0
- package/dist/lib/api-client.js +6 -1
- package/dist/lib/docs.js +15 -3
- package/dist/lib/local-sim/actions.d.ts +18 -0
- package/dist/lib/local-sim/actions.js +30 -0
- package/dist/lib/local-sim/adb.d.ts +39 -0
- package/dist/lib/local-sim/adb.js +152 -17
- package/dist/lib/local-sim/android.d.ts +12 -4
- package/dist/lib/local-sim/android.js +44 -11
- package/dist/lib/local-sim/device.d.ts +44 -0
- package/dist/lib/local-sim/ios.d.ts +12 -5
- package/dist/lib/local-sim/ios.js +45 -11
- package/dist/lib/local-sim/loop.js +220 -26
- package/dist/lib/local-sim/native-a11y.d.ts +24 -0
- package/dist/lib/local-sim/native-a11y.js +76 -14
- package/dist/lib/local-sim/screen-signature.d.ts +77 -0
- package/dist/lib/local-sim/screen-signature.js +166 -0
- package/dist/lib/local-sim/simctl.d.ts +15 -0
- package/dist/lib/local-sim/simctl.js +41 -1
- package/dist/lib/local-sim/types.d.ts +11 -2
- package/dist/lib/local-sim/xcuitest.d.ts +7 -0
- package/dist/lib/local-sim/xcuitest.js +16 -0
- package/dist/lib/modality.js +7 -2
- package/dist/lib/paths.d.ts +6 -0
- package/dist/lib/paths.js +9 -0
- package/dist/lib/report-readiness.d.ts +44 -0
- package/dist/lib/report-readiness.js +74 -0
- package/dist/lib/skill-content.js +2 -0
- package/package.json +1 -1
|
@@ -5,10 +5,12 @@
|
|
|
5
5
|
* against a SimulationDevice (a Playwright browser today; a native Android
|
|
6
6
|
* emulator next). The loop is device-agnostic — see device.ts.
|
|
7
7
|
*/
|
|
8
|
+
import { appendFileSync } from "node:fs";
|
|
8
9
|
import { launchSharedBrowser, FULL_PAGE_HEIGHT_CAP_PX_MOBILE, FULL_PAGE_HEIGHT_CAP_PX_DESKTOP, } from "./browser.js";
|
|
9
10
|
import { uploadScreenshot } from "./upload.js";
|
|
10
|
-
import { detectNoVisibleChange, describeAction } from "./actions.js";
|
|
11
|
+
import { detectNoVisibleChange, describeAction, classifyStepKind } from "./actions.js";
|
|
11
12
|
import { createDevice } from "./device.js";
|
|
13
|
+
import pkg from "../../../package.json" with { type: "json" };
|
|
12
14
|
import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
|
|
13
15
|
/**
|
|
14
16
|
* Native (mobile) platforms drive a single physical device via screenshot →
|
|
@@ -18,6 +20,58 @@ import { enableDebug, isDebugEnabled, debugRawResponse, debugNormalizedActions,
|
|
|
18
20
|
function isNativePlatform(platform) {
|
|
19
21
|
return platform === "android" || platform === "ios";
|
|
20
22
|
}
|
|
23
|
+
/**
|
|
24
|
+
* Build ONE corpus-dump JSON line capturing everything needed to replay any
|
|
25
|
+
* screen-signature algorithm offline against this observation. Pure (input →
|
|
26
|
+
* string); the caller owns the I/O and the env gating. `app` is the coarse
|
|
27
|
+
* package (android) / bundle id (ios). Each node is projected down to the exact
|
|
28
|
+
* fields `computeScreenSignature` reads, so the line is a faithful replay basis.
|
|
29
|
+
*/
|
|
30
|
+
function buildCorpusDumpLine(input) {
|
|
31
|
+
const { coarse } = input;
|
|
32
|
+
return (JSON.stringify({
|
|
33
|
+
ts: input.ts,
|
|
34
|
+
app: (coarse.platform === "android" ? coarse.package : coarse.bundleId) ?? "",
|
|
35
|
+
platform: coarse.platform,
|
|
36
|
+
location: input.location,
|
|
37
|
+
coarse: {
|
|
38
|
+
platform: coarse.platform,
|
|
39
|
+
package: coarse.package ?? null,
|
|
40
|
+
activity: coarse.activity ?? null,
|
|
41
|
+
bundleId: coarse.bundleId ?? null,
|
|
42
|
+
},
|
|
43
|
+
nodes: input.nodes.map((n) => ({
|
|
44
|
+
role: n.role,
|
|
45
|
+
label: n.label,
|
|
46
|
+
resourceId: n.resourceId ?? null,
|
|
47
|
+
scrollable: n.scrollable,
|
|
48
|
+
insideScrollable: n.insideScrollable,
|
|
49
|
+
})),
|
|
50
|
+
signature: input.signature
|
|
51
|
+
? {
|
|
52
|
+
value: input.signature.value,
|
|
53
|
+
usable: input.signature.usable,
|
|
54
|
+
tokenCount: input.signature.tokenCount,
|
|
55
|
+
}
|
|
56
|
+
: null,
|
|
57
|
+
frame_version_id: input.frameVersionId ?? null,
|
|
58
|
+
action_kind: input.actionKind,
|
|
59
|
+
}) + "\n");
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Append one corpus-dump line to `path`. Best-effort: a dump failure (bad path,
|
|
63
|
+
* full disk) is swallowed so the instrumentation can NEVER abort a live sim.
|
|
64
|
+
* Gated entirely by the caller on ISH_DUMP_CORPUS + native source.
|
|
65
|
+
*/
|
|
66
|
+
function appendCorpusDumpLine(path, input, log) {
|
|
67
|
+
try {
|
|
68
|
+
appendFileSync(path, buildCorpusDumpLine(input));
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
72
|
+
log(` Warning: corpus dump append failed — ${msg}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
21
75
|
/**
|
|
22
76
|
* Convert a raw action (from either resolved_actions or output.action.actions)
|
|
23
77
|
* into the flat LocalStepAction shape used by the executor. Exported for unit
|
|
@@ -113,6 +167,34 @@ const SENTIMENT_ICONS = {
|
|
|
113
167
|
Positive: "+", Negative: "-", Neutral: "~",
|
|
114
168
|
Frustrated: "!", Confused: "?", Delighted: "*",
|
|
115
169
|
};
|
|
170
|
+
const CLI_VERSION = pkg.version;
|
|
171
|
+
/**
|
|
172
|
+
* Stamp the app build this run drove onto the iteration, so the web app's
|
|
173
|
+
* run-settings card can show which build the iteration is on. Best-effort:
|
|
174
|
+
* a native run never depends on this landing, so failures are warned, not
|
|
175
|
+
* thrown. Only native platforms carry a build.
|
|
176
|
+
*/
|
|
177
|
+
async function reportObservedApp(client, iterationId, platform, build, log) {
|
|
178
|
+
if (platform !== "ios" && platform !== "android")
|
|
179
|
+
return;
|
|
180
|
+
try {
|
|
181
|
+
await client.post(`/iterations/${iterationId}/observed-app`, {
|
|
182
|
+
platform,
|
|
183
|
+
package: build.package,
|
|
184
|
+
version: build.version,
|
|
185
|
+
build: build.build,
|
|
186
|
+
cli_version: CLI_VERSION,
|
|
187
|
+
});
|
|
188
|
+
const label = [build.version, build.build ? `(${build.build})` : null]
|
|
189
|
+
.filter(Boolean)
|
|
190
|
+
.join(" ");
|
|
191
|
+
log(`Recorded app build${label ? `: ${label}` : ""}`);
|
|
192
|
+
}
|
|
193
|
+
catch (err) {
|
|
194
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
195
|
+
console.warn(`Could not record app build for the iteration: ${msg}`);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
116
198
|
/**
|
|
117
199
|
* Run local simulations — parallel when multiple participants, sequential by default.
|
|
118
200
|
* Use --parallel <n> to control concurrency (default: number of participants).
|
|
@@ -139,6 +221,16 @@ export async function runLocalSimulations(client, opts) {
|
|
|
139
221
|
log("Native (android/ios) runs drive a single device — running sequentially.");
|
|
140
222
|
}
|
|
141
223
|
const concurrency = isNativeRun ? 1 : (opts.parallel ?? opts.participantIds.length);
|
|
224
|
+
// Native runs stamp the app build onto the iteration once — every
|
|
225
|
+
// participant in a run drives the same installed build, so dedupe to a
|
|
226
|
+
// single best-effort POST after the first device resolves its app.
|
|
227
|
+
let appBuildReported = false;
|
|
228
|
+
const reportAppBuild = (build, platform) => {
|
|
229
|
+
if (appBuildReported)
|
|
230
|
+
return;
|
|
231
|
+
appBuildReported = true;
|
|
232
|
+
void reportObservedApp(client, opts.iterationId, platform, build, log);
|
|
233
|
+
};
|
|
142
234
|
try {
|
|
143
235
|
if (concurrency <= 1 || opts.participantIds.length <= 1) {
|
|
144
236
|
// Sequential execution — each participant owns its own browser
|
|
@@ -149,7 +241,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
149
241
|
log(`\nStarting local simulation for ${participantName}...`);
|
|
150
242
|
try {
|
|
151
243
|
const participantLog = (msg) => log(`[${participantName}] ${msg}`);
|
|
152
|
-
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled);
|
|
244
|
+
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild);
|
|
153
245
|
log(`Completed: ${participantName}`);
|
|
154
246
|
}
|
|
155
247
|
catch (err) {
|
|
@@ -183,7 +275,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
183
275
|
const participantLog = (msg) => log(`[${participantName}] ${msg}`);
|
|
184
276
|
participantLog("Starting...");
|
|
185
277
|
try {
|
|
186
|
-
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, sharedBrowser);
|
|
278
|
+
await runSingleSimulation(client, participantId, participantName, opts, participantLog, () => cancelled, reportAppBuild, sharedBrowser);
|
|
187
279
|
participantLog("Completed");
|
|
188
280
|
}
|
|
189
281
|
catch (err) {
|
|
@@ -203,7 +295,7 @@ export async function runLocalSimulations(client, opts) {
|
|
|
203
295
|
process.off("SIGINT", onSigint);
|
|
204
296
|
}
|
|
205
297
|
}
|
|
206
|
-
async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, sharedBrowser) {
|
|
298
|
+
async function runSingleSimulation(client, participantId, participantName, opts, log, isCancelled, onAppBuild, sharedBrowser) {
|
|
207
299
|
// Step 1: Initialize session
|
|
208
300
|
const initResponse = await client.localSimInit({
|
|
209
301
|
participant_id: participantId,
|
|
@@ -274,6 +366,19 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
274
366
|
try {
|
|
275
367
|
// Step 3: Launch / navigate the target to its starting point.
|
|
276
368
|
await device.launchOrReset(launchTarget);
|
|
369
|
+
// Step 3b: Capture the installed app's build (native only). Best-effort —
|
|
370
|
+
// the dedupe in runLocalSimulations keeps this to one POST per run, and a
|
|
371
|
+
// failed read or report never disturbs the simulation.
|
|
372
|
+
if (onAppBuild) {
|
|
373
|
+
try {
|
|
374
|
+
const observed = await device.appBuild?.();
|
|
375
|
+
if (observed)
|
|
376
|
+
onAppBuild(observed, platform);
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
// ignore — build capture is non-essential
|
|
380
|
+
}
|
|
381
|
+
}
|
|
277
382
|
// Step 4: Run assignment loop
|
|
278
383
|
for (let assignmentIdx = 0; assignmentIdx < session.assignments.length; assignmentIdx++) {
|
|
279
384
|
const assignment = session.assignments[assignmentIdx];
|
|
@@ -284,6 +389,12 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
284
389
|
// status when the loop ends because the agent terminated (completed vs
|
|
285
390
|
// abandoned). Stays "in_progress" if the loop hits max_steps.
|
|
286
391
|
let lastAssignmentStatus = "in_progress";
|
|
392
|
+
// Frame continuity (native): carry the PREVIOUS step's logical-screen
|
|
393
|
+
// classification + matched frame forward, so this step's match-frame call
|
|
394
|
+
// can tell the backend to reuse the frame when the screen didn't change
|
|
395
|
+
// (pure scroll / non-submitting keyboard). Reset per assignment.
|
|
396
|
+
let lastStepKind = "none";
|
|
397
|
+
let lastFrameVersionId;
|
|
287
398
|
while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
|
|
288
399
|
// OBSERVE — the device refreshes its own active surface (popup /
|
|
289
400
|
// switch_tab for browser) before capturing. (The browser device emits
|
|
@@ -291,6 +402,11 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
291
402
|
// TODO(perf): backend can downscale before the vision LLM; full-res sent for now.
|
|
292
403
|
const obs = await device.observe();
|
|
293
404
|
const currentScreenshot = obs.screenshot;
|
|
405
|
+
// Corpus dump (ISH_DUMP_CORPUS): the action_kind of the step that LED to
|
|
406
|
+
// THIS observation is the inbound lastStepKind (carried from the prior
|
|
407
|
+
// step; reassigned below AFTER the match-frame call). At step 0 nothing
|
|
408
|
+
// preceded this screen, so report it as "initial".
|
|
409
|
+
const inboundActionKind = step === 0 ? "initial" : lastStepKind;
|
|
294
410
|
// Capture JPEG of observation for upload and recording (pre-action)
|
|
295
411
|
const obsJpeg = await device.captureScreenshotJpeg();
|
|
296
412
|
const obsBase64 = obsJpeg.toString("base64");
|
|
@@ -395,12 +511,16 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
395
511
|
const actionDescs = [];
|
|
396
512
|
const elementNames = [];
|
|
397
513
|
const actionDebugEntries = [];
|
|
514
|
+
// Per-action success (index-aligned with stepResponse.actions), used to
|
|
515
|
+
// classify this step's logical-screen kind for frame continuity.
|
|
516
|
+
const perActionSuccess = [];
|
|
398
517
|
const preActionScreenshot = await device.captureScreenshot();
|
|
399
518
|
for (let i = 0; i < stepResponse.actions.length; i++) {
|
|
400
519
|
if (isCancelled())
|
|
401
520
|
break;
|
|
402
521
|
const action = stepResponse.actions[i];
|
|
403
522
|
const result = await device.executeAction(action);
|
|
523
|
+
perActionSuccess[i] = result.success;
|
|
404
524
|
const desc = describeAction(action);
|
|
405
525
|
debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
|
|
406
526
|
const openedNewTab = result.openedNewTab;
|
|
@@ -415,29 +535,44 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
415
535
|
const actionType = action.type || "unknown";
|
|
416
536
|
const INTERNAL_ACTIONS = new Set(["think"]);
|
|
417
537
|
if (!INTERNAL_ACTIONS.has(actionType)) {
|
|
538
|
+
// Pack `data` to match the hosted sim's map_action_to_db so native
|
|
539
|
+
// rows render identically. value_type lets the FE flag var/secret;
|
|
540
|
+
// drag's full path goes under data.coordinates (0-1000), not a
|
|
541
|
+
// bespoke drag_end. Secret `value` stays masked (it's the variable
|
|
542
|
+
// key, not the resolved secret — masking is strictly safer than the
|
|
543
|
+
// web path, and value_type now drives the FE lock glyph).
|
|
544
|
+
const actionData = {
|
|
545
|
+
...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
|
|
546
|
+
...(action.value_type && { value_type: action.value_type }),
|
|
547
|
+
...(action.mode && { mode: action.mode }),
|
|
548
|
+
...(action.submit && { submit: action.submit }),
|
|
549
|
+
...(action.direction && { direction: action.direction }),
|
|
550
|
+
...(action.amount && { amount: action.amount }),
|
|
551
|
+
...(action.count && action.count > 1 && { count: action.count }),
|
|
552
|
+
...(action.duration_ms && { duration_ms: action.duration_ms }),
|
|
553
|
+
...(action.modifiers?.length && { modifiers: action.modifiers }),
|
|
554
|
+
...(action.key && { key: action.key }),
|
|
555
|
+
...(action.tab_id && { tab_id: action.tab_id }),
|
|
556
|
+
...(action.orientation && { orientation: action.orientation }),
|
|
557
|
+
...(action.panel && { panel: action.panel }),
|
|
558
|
+
...(action.drag && {
|
|
559
|
+
coordinates: {
|
|
560
|
+
startX: action.drag.startX,
|
|
561
|
+
startY: action.drag.startY,
|
|
562
|
+
endX: action.drag.endX,
|
|
563
|
+
endY: action.drag.endY,
|
|
564
|
+
},
|
|
565
|
+
}),
|
|
566
|
+
...(openedNewTab && { opened_new_tab: true }),
|
|
567
|
+
};
|
|
418
568
|
actionDatas.push({
|
|
419
569
|
action_type: actionType,
|
|
420
570
|
element_label: action.element_name ?? null,
|
|
421
571
|
element_type: action.element_type ?? null,
|
|
422
|
-
coordinates
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
...(action.submit && { submit: action.submit }),
|
|
427
|
-
...(action.direction && { direction: action.direction }),
|
|
428
|
-
...(action.amount && { amount: action.amount }),
|
|
429
|
-
...(action.count && action.count > 1 && { count: action.count }),
|
|
430
|
-
...(action.duration_ms && { duration_ms: action.duration_ms }),
|
|
431
|
-
...(action.modifiers?.length && { modifiers: action.modifiers }),
|
|
432
|
-
...(action.key && { key: action.key }),
|
|
433
|
-
...(action.tab_id && { tab_id: action.tab_id }),
|
|
434
|
-
...(action.orientation && { orientation: action.orientation }),
|
|
435
|
-
...(action.panel && { panel: action.panel }),
|
|
436
|
-
// The recorded `coordinates` is the drag START; persist the END
|
|
437
|
-
// (normalized 0-1000) too so the journey captures the full path.
|
|
438
|
-
...(action.drag && { drag_end: { x: action.drag.endX, y: action.drag.endY } }),
|
|
439
|
-
...(openedNewTab && { opened_new_tab: true }),
|
|
440
|
-
},
|
|
572
|
+
// Drag's path lives in data.coordinates; the hosted sim leaves the
|
|
573
|
+
// top-level coordinates null for a drag.
|
|
574
|
+
coordinates: action.drag ? null : normalizedCoords,
|
|
575
|
+
data: Object.keys(actionData).length ? actionData : null,
|
|
441
576
|
order: i,
|
|
442
577
|
});
|
|
443
578
|
}
|
|
@@ -494,6 +629,24 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
494
629
|
// Native: drive FrameSourceType.ANDROID/IOS directly; browser falls
|
|
495
630
|
// back to screen_format server-side.
|
|
496
631
|
platform,
|
|
632
|
+
// Frame continuity: these describe the transition INTO this
|
|
633
|
+
// observation, produced by the PREVIOUS step's action. When that
|
|
634
|
+
// step was a pure scroll / non-submitting keyboard on a native
|
|
635
|
+
// device, the logical screen didn't change — tell the backend to
|
|
636
|
+
// reuse the previous frame instead of minting a new one off the
|
|
637
|
+
// shifted pixels. Carried from lastStepKind / lastFrameVersionId,
|
|
638
|
+
// updated AFTER this call for the next iteration.
|
|
639
|
+
...(isNative && lastFrameVersionId ? { previous_frame_version_id: lastFrameVersionId } : {}),
|
|
640
|
+
same_screen_continuation: isNative && (lastStepKind === "scroll" || lastStepKind === "keyboard"),
|
|
641
|
+
// Phase 2: scroll-invariant structural screen signature as an
|
|
642
|
+
// entry/cross-run anchor. Sent ONLY when usable (>= 2 stable chrome
|
|
643
|
+
// ids) — a sparse/empty id-set hashes to a colliding value that
|
|
644
|
+
// would silently over-merge distinct screens, so we omit it and let
|
|
645
|
+
// the backend fall back to Phase-1 continuity. Computed in the
|
|
646
|
+
// device's observe() from this step's parsed a11y tree.
|
|
647
|
+
...(isNative && obs.screenSignature?.usable
|
|
648
|
+
? { native_screen_signature: obs.screenSignature.value }
|
|
649
|
+
: {}),
|
|
497
650
|
});
|
|
498
651
|
frameVersionId = matchResult.frame_version_id;
|
|
499
652
|
}
|
|
@@ -501,6 +654,31 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
501
654
|
const msg = err instanceof Error ? err.message : String(err);
|
|
502
655
|
log(` Warning: frame matching failed — ${msg}`);
|
|
503
656
|
}
|
|
657
|
+
// Corpus dump (ISH_DUMP_CORPUS, native only): one JSON line per
|
|
658
|
+
// observation with everything needed to replay any screen-signature
|
|
659
|
+
// algorithm offline — the LLM screen label (ground truth), the coarse
|
|
660
|
+
// inputs, the exact parsed NativeNode[], the current algorithm's
|
|
661
|
+
// signature, the backend frame id, and the inbound action_kind. Fully
|
|
662
|
+
// gated and best-effort: zero overhead/behavior change when unset, and a
|
|
663
|
+
// dump failure never aborts the sim. Requires the native observe()'s
|
|
664
|
+
// optional nativeNodes/coarseInputs (browser leaves them undefined).
|
|
665
|
+
const corpusDumpPath = process.env.ISH_DUMP_CORPUS;
|
|
666
|
+
if (corpusDumpPath && isNative && obs.nativeNodes && obs.coarseInputs) {
|
|
667
|
+
appendCorpusDumpLine(corpusDumpPath, {
|
|
668
|
+
ts: step,
|
|
669
|
+
location: stepResponse.current_location,
|
|
670
|
+
coarse: obs.coarseInputs,
|
|
671
|
+
nodes: obs.nativeNodes,
|
|
672
|
+
signature: obs.screenSignature,
|
|
673
|
+
frameVersionId,
|
|
674
|
+
actionKind: inboundActionKind,
|
|
675
|
+
}, log);
|
|
676
|
+
}
|
|
677
|
+
// Carry THIS step's logical-screen classification + matched frame
|
|
678
|
+
// forward for the NEXT iteration's match-frame call (consumed above as
|
|
679
|
+
// last*). Classify after the call so ordering is consume-then-update.
|
|
680
|
+
lastStepKind = classifyStepKind(stepResponse.actions, perActionSuccess);
|
|
681
|
+
lastFrameVersionId = frameVersionId;
|
|
504
682
|
// Debug-only: capture post-action screenshot to show result
|
|
505
683
|
let postActionBase64;
|
|
506
684
|
if (isDebugEnabled()) {
|
|
@@ -520,7 +698,7 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
520
698
|
forwards.push({ type: "LOOP_DETECTED", content: "A repetitive action cycle was detected. Try a different approach." });
|
|
521
699
|
}
|
|
522
700
|
// Record interaction (1-indexed step for backend)
|
|
523
|
-
|
|
701
|
+
const interaction = {
|
|
524
702
|
step: step + 1,
|
|
525
703
|
assignment_id: assignment.id,
|
|
526
704
|
...(screenshotUrl ? { screenshot_url: screenshotUrl } : { screenshot_base64: obsBase64 }),
|
|
@@ -544,7 +722,24 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
544
722
|
// Server reduces this to Interaction.tab when N >= 2; omit on
|
|
545
723
|
// single-tab steps to keep the payload (and DB column) null.
|
|
546
724
|
...(tabsSnapshot.length >= 2 ? { tabs: tabsSnapshot } : {}),
|
|
547
|
-
}
|
|
725
|
+
};
|
|
726
|
+
// Keep the in-memory array for the debug HTML report.
|
|
727
|
+
interactions.push(interaction);
|
|
728
|
+
// Stream this interaction live so the backend persists + commits it
|
|
729
|
+
// immediately and fires INTERACTION_CREATED in realtime. A streaming
|
|
730
|
+
// failure must never abort the run — log and continue (the run-end
|
|
731
|
+
// finalize call still records the terminal state).
|
|
732
|
+
try {
|
|
733
|
+
await client.localSimRecordInteraction({
|
|
734
|
+
participant_id: session.participant_id,
|
|
735
|
+
product_id: session.product_id,
|
|
736
|
+
interaction,
|
|
737
|
+
});
|
|
738
|
+
}
|
|
739
|
+
catch (err) {
|
|
740
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
741
|
+
log(` Warning: failed to stream interaction ${step + 1} — ${msg}`);
|
|
742
|
+
}
|
|
548
743
|
// Update history for next step
|
|
549
744
|
history.push({
|
|
550
745
|
comment: stepResponse.comment,
|
|
@@ -635,7 +830,6 @@ async function runSingleSimulation(client, participantId, participantName, opts,
|
|
|
635
830
|
await client.localSimRecord({
|
|
636
831
|
participant_id: session.participant_id,
|
|
637
832
|
product_id: session.product_id,
|
|
638
|
-
interactions,
|
|
639
833
|
final_status: finalStatus,
|
|
640
834
|
assignment_statuses: assignmentStatuses,
|
|
641
835
|
});
|
|
@@ -45,6 +45,23 @@ export interface NativeNode {
|
|
|
45
45
|
/** True for nodes whose own text/desc is a label (used to aggregate onto rows). */
|
|
46
46
|
hasOwnLabel: boolean;
|
|
47
47
|
resourceId?: string;
|
|
48
|
+
/**
|
|
49
|
+
* True for a scroll container (Android `scrollable="true"`; iOS
|
|
50
|
+
* ScrollView/Table/CollectionView). The screen-signature uses it to keep the
|
|
51
|
+
* container's OWN id as durable chrome — see screen-signature.ts.
|
|
52
|
+
*/
|
|
53
|
+
scrollable: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* True iff this node has a scrollable ANCESTOR — i.e. it is scroll CONTENT that
|
|
56
|
+
* shifts under a scroll. Computed STRUCTURALLY during parsing (tree ancestry),
|
|
57
|
+
* not geometrically: an overlay/FAB that merely sits inside a list's rect is
|
|
58
|
+
* NOT marked (it isn't a tree descendant), and on iOS the descendants of a
|
|
59
|
+
* pruned (isAccessible=0) scroll container still inherit the flag. The
|
|
60
|
+
* screen-signature excludes these from the stable token set so a scroll never
|
|
61
|
+
* changes the signature — see screen-signature.ts. A scroll container itself
|
|
62
|
+
* has `scrollable=true` but `insideScrollable=false` (unless nested).
|
|
63
|
+
*/
|
|
64
|
+
insideScrollable: boolean;
|
|
48
65
|
space: CoordinateSpace;
|
|
49
66
|
}
|
|
50
67
|
export interface NativeTree {
|
|
@@ -64,6 +81,13 @@ export interface NativeTree {
|
|
|
64
81
|
* raw fields; the serializer decides which to emit and how to aggregate.
|
|
65
82
|
*/
|
|
66
83
|
export declare function parseUiautomatorXml(xml: string): NativeNode[];
|
|
84
|
+
/**
|
|
85
|
+
* The foreground app's package name from a uiautomator dump's `package="..."`
|
|
86
|
+
* attribute. uiautomator stamps every node with the owning package; the first
|
|
87
|
+
* one is the foreground app. Used as a coarse-token input for the screen
|
|
88
|
+
* signature (see screen-signature.ts). Returns "" when absent (best-effort).
|
|
89
|
+
*/
|
|
90
|
+
export declare function androidPackage(xml: string): string;
|
|
67
91
|
/**
|
|
68
92
|
* Parse WDA's `GET /source?format=json` — a NESTED accessibility tree — into the
|
|
69
93
|
* FLAT, depth-first `NativeNode[]` (POINTS) that `parseXcuiHierarchy` produces,
|
|
@@ -122,7 +122,11 @@ function unescapeXml(s) {
|
|
|
122
122
|
export function parseUiautomatorXml(xml) {
|
|
123
123
|
const root = buildAndroidTree(xml);
|
|
124
124
|
const out = [];
|
|
125
|
-
|
|
125
|
+
// `parentScrollable` is true iff any ANCESTOR (not this node) had
|
|
126
|
+
// scrollable=true — i.e. this node is scroll CONTENT. Threaded down the
|
|
127
|
+
// descent so the screen-signature can exclude content structurally (a scroll
|
|
128
|
+
// moves these; chrome outside any scrollable keeps the signature stable).
|
|
129
|
+
const visit = (n, parentScrollable) => {
|
|
126
130
|
// Drop nodes with no usable bounds (malformed/zero-area) — they have no
|
|
127
131
|
// tappable center and would corrupt the nodeMap.
|
|
128
132
|
if (n.bounds) {
|
|
@@ -134,14 +138,20 @@ export function parseUiautomatorXml(xml) {
|
|
|
134
138
|
clickable: n.clickable,
|
|
135
139
|
hasOwnLabel: label.length > 0,
|
|
136
140
|
resourceId: n.resourceId || undefined,
|
|
141
|
+
scrollable: n.scrollable,
|
|
142
|
+
insideScrollable: parentScrollable,
|
|
137
143
|
space: "px",
|
|
138
144
|
});
|
|
139
145
|
}
|
|
146
|
+
// A node inside a scrollable makes ALL its descendants scroll content; the
|
|
147
|
+
// container's own flag stays false (it's durable chrome) but its children
|
|
148
|
+
// inherit true.
|
|
149
|
+
const childScrollable = parentScrollable || n.scrollable;
|
|
140
150
|
for (const c of n.children)
|
|
141
|
-
visit(c);
|
|
151
|
+
visit(c, childScrollable);
|
|
142
152
|
};
|
|
143
153
|
for (const c of root.children)
|
|
144
|
-
visit(c);
|
|
154
|
+
visit(c, false);
|
|
145
155
|
return out;
|
|
146
156
|
}
|
|
147
157
|
/**
|
|
@@ -151,7 +161,7 @@ export function parseUiautomatorXml(xml) {
|
|
|
151
161
|
* are its true descendants — required for ancestor-vs-leaf aggregation.
|
|
152
162
|
*/
|
|
153
163
|
function buildAndroidTree(xml) {
|
|
154
|
-
const root = makeRawAndroidNode("", "", "", "", false, null);
|
|
164
|
+
const root = makeRawAndroidNode("", "", "", "", false, false, null);
|
|
155
165
|
const stack = [root];
|
|
156
166
|
// Match every <node ...> / <node .../> open tag and standalone </node> close.
|
|
157
167
|
// Attribute values are consumed as atomic quoted runs (`"[^"]*"`) so a literal
|
|
@@ -171,19 +181,34 @@ function buildAndroidTree(xml) {
|
|
|
171
181
|
// the greedy run above swallows the trailing slash, so a `(\/?)` capture
|
|
172
182
|
// can't see it.
|
|
173
183
|
const selfClosing = tag.endsWith("/>");
|
|
174
|
-
const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", parseAndroidBounds(attr(tag, "bounds")));
|
|
184
|
+
const node = makeRawAndroidNode(attr(tag, "class"), attr(tag, "text"), attr(tag, "content-desc"), attr(tag, "resource-id"), attr(tag, "clickable") === "true", attr(tag, "scrollable") === "true", parseAndroidBounds(attr(tag, "bounds")));
|
|
175
185
|
stack[stack.length - 1].children.push(node);
|
|
176
186
|
if (!selfClosing)
|
|
177
187
|
stack.push(node);
|
|
178
188
|
}
|
|
179
189
|
return root;
|
|
180
190
|
}
|
|
181
|
-
function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, bounds) {
|
|
182
|
-
return { role, text, contentDesc, resourceId, clickable, bounds, children: [] };
|
|
191
|
+
function makeRawAndroidNode(role, text, contentDesc, resourceId, clickable, scrollable, bounds) {
|
|
192
|
+
return { role, text, contentDesc, resourceId, clickable, scrollable, bounds, children: [] };
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* The foreground app's package name from a uiautomator dump's `package="..."`
|
|
196
|
+
* attribute. uiautomator stamps every node with the owning package; the first
|
|
197
|
+
* one is the foreground app. Used as a coarse-token input for the screen
|
|
198
|
+
* signature (see screen-signature.ts). Returns "" when absent (best-effort).
|
|
199
|
+
*/
|
|
200
|
+
export function androidPackage(xml) {
|
|
201
|
+
const m = /<node\b[^>]*?\spackage="([^"]*)"/.exec(xml);
|
|
202
|
+
return m ? unescapeXml(m[1]) : "";
|
|
183
203
|
}
|
|
184
204
|
// ---------------------------------------------------------------------------
|
|
185
205
|
// iOS — shared helpers for the WebDriverAgent (XCUITest) /source parser below
|
|
186
206
|
// ---------------------------------------------------------------------------
|
|
207
|
+
/** iOS container types whose CONTENT scrolls. A node of one of these types (or
|
|
208
|
+
* any descendant of one) is marked `insideScrollable` so the screen signature
|
|
209
|
+
* excludes scroll content structurally while keeping the container's own id
|
|
210
|
+
* (see screen-signature.ts). */
|
|
211
|
+
const IOS_SCROLLABLE_TYPES = new Set(["ScrollView", "Table", "CollectionView"]);
|
|
187
212
|
/** iOS roles/types that are directly actionable (the device taps their center). */
|
|
188
213
|
const IOS_ACTIONABLE_TYPES = new Set([
|
|
189
214
|
"Button",
|
|
@@ -258,15 +283,47 @@ export function parseXcuiHierarchy(json) {
|
|
|
258
283
|
if (!root || typeof root !== "object")
|
|
259
284
|
return [];
|
|
260
285
|
const out = [];
|
|
261
|
-
|
|
286
|
+
// `parentScrollable` is true iff this node OR any ANCESTOR is a scroll
|
|
287
|
+
// container. CRITICAL (the M1 fix): WDA's scroll CONTAINER is isAccessible=0
|
|
288
|
+
// and therefore NOT emitted, but its descendants are scroll content all the
|
|
289
|
+
// same — so the flag is threaded down the recursion regardless of whether the
|
|
290
|
+
// container node itself is emitted. The screen-signature excludes these
|
|
291
|
+
// structurally, so a scroll never changes the iOS signature.
|
|
292
|
+
const visit = (n, parentScrollable) => {
|
|
293
|
+
const rawType = n.type ?? "";
|
|
294
|
+
const typeKey = stripAxPrefix(rawType);
|
|
295
|
+
const isScroll = IOS_SCROLLABLE_TYPES.has(typeKey);
|
|
262
296
|
const bounds = frameToBounds(n.rect ?? undefined);
|
|
297
|
+
// iOS NAVIGATION-BAR TITLE recovery. The bar carries the screen title in its
|
|
298
|
+
// `name`, but WDA marks the bar isAccessible=0 (so it's pruned) AND the large
|
|
299
|
+
// title StaticText scrolls WITH the content (insideScrollable). The title is
|
|
300
|
+
// then lost from the signature, silently OVER-MERGING distinct pushed screens
|
|
301
|
+
// (proven live: iOS Settings General/Accessibility/Privacy all reduced to the
|
|
302
|
+
// back button's parent label {tx:settings} → one frame). Emit the bar's name
|
|
303
|
+
// as a stable chrome node — it sits ABOVE the scroll (insideScrollable=false)
|
|
304
|
+
// and is scroll-invariant (constant as the large title collapses). Emitted
|
|
305
|
+
// first so `iosNavTitle` (find role==="navigationbar") sees the titled bar.
|
|
306
|
+
if (bounds && typeKey === "NavigationBar" && wdaTruthy(n.isVisible)) {
|
|
307
|
+
const navName = (n.name ?? "").trim();
|
|
308
|
+
if (navName) {
|
|
309
|
+
out.push({
|
|
310
|
+
role: normalizeRole(rawType),
|
|
311
|
+
label: navName,
|
|
312
|
+
bounds,
|
|
313
|
+
clickable: false,
|
|
314
|
+
hasOwnLabel: true,
|
|
315
|
+
resourceId: undefined,
|
|
316
|
+
scrollable: false,
|
|
317
|
+
insideScrollable: false,
|
|
318
|
+
space: "points",
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
263
322
|
if (bounds && wdaTruthy(n.isAccessible) && wdaTruthy(n.isVisible)) {
|
|
264
323
|
// Prefer the spoken label; fall back to a STRING value (search fields
|
|
265
324
|
// expose their placeholder as `value`). Non-string values (a Switch's 1/0)
|
|
266
325
|
// are ignored for the label, exactly like the idb path.
|
|
267
326
|
const label = (n.label ?? (typeof n.value === "string" ? n.value : "")).trim();
|
|
268
|
-
const rawType = n.type ?? "";
|
|
269
|
-
const typeKey = stripAxPrefix(rawType);
|
|
270
327
|
// `isEnabled` absent ⇒ assume enabled (WDA omits it on always-enabled types).
|
|
271
328
|
const enabled = n.isEnabled == null ? true : wdaTruthy(n.isEnabled);
|
|
272
329
|
const actionable = IOS_ACTIONABLE_TYPES.has(typeKey) && enabled;
|
|
@@ -277,16 +334,21 @@ export function parseXcuiHierarchy(json) {
|
|
|
277
334
|
clickable: actionable,
|
|
278
335
|
hasOwnLabel: label.length > 0,
|
|
279
336
|
resourceId: (n.name || n.rawIdentifier) ?? undefined,
|
|
337
|
+
scrollable: isScroll,
|
|
338
|
+
insideScrollable: parentScrollable,
|
|
280
339
|
space: "points",
|
|
281
340
|
});
|
|
282
341
|
}
|
|
283
342
|
// Recurse into ALL children — an accessible element can nest inside a
|
|
284
|
-
// non-accessible container (the Cell wrapping the Button
|
|
285
|
-
// prune the walk by accessibility, only
|
|
343
|
+
// non-accessible container (the Cell wrapping the Button, or the pruned
|
|
344
|
+
// scroll container), so we must not prune the walk by accessibility, only
|
|
345
|
+
// the emission. The scroll flag propagates onto descendants even though the
|
|
346
|
+
// container itself wasn't emitted.
|
|
347
|
+
const childScrollable = parentScrollable || isScroll;
|
|
286
348
|
for (const c of n.children ?? [])
|
|
287
|
-
visit(c);
|
|
349
|
+
visit(c, childScrollable);
|
|
288
350
|
};
|
|
289
|
-
visit(root);
|
|
351
|
+
visit(root, false);
|
|
290
352
|
return out;
|
|
291
353
|
}
|
|
292
354
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native "screen signature" v2 — a SCROLL-INVARIANT structural identity for a
|
|
3
|
+
* logical native screen, derived from the accessibility tree, sent to the
|
|
4
|
+
* backend as an entry/cross-run frame anchor (Phase 2 of native frame
|
|
5
|
+
* continuity; Phase 1 reuses the prior frame on pure scroll/keyboard steps).
|
|
6
|
+
*
|
|
7
|
+
* FCIS: this module is PURE (NativeNode[] + coarse inputs in, signature out) —
|
|
8
|
+
* no device access. The device gathers the coarse inputs (foreground activity /
|
|
9
|
+
* bundle id) and the parsed tree; this turns them into `{value, usable}`.
|
|
10
|
+
*
|
|
11
|
+
* The signature has two parts:
|
|
12
|
+
* coarse — a cheap, almost-always-available anchor (android `package|activity`,
|
|
13
|
+
* ios `bundleId|navTitle`).
|
|
14
|
+
* tokens — the persistent CHROME tokens that are NOT scroll content. Each
|
|
15
|
+
* chrome node contributes its resource-id (`id:…`) AND its label
|
|
16
|
+
* (`tx:…`) when present. This is what makes the signature
|
|
17
|
+
* scroll-invariant AND lets two same-activity screens be told apart.
|
|
18
|
+
*
|
|
19
|
+
* WHY v2 (two verified gaps in the id-only v1):
|
|
20
|
+
* 1. LABELS close the shared-chrome OVER-MERGE. A single-Activity app — Jetpack
|
|
21
|
+
* Compose (exposes NO resource-ids beyond the framework `android:id/content`)
|
|
22
|
+
* or a View app with a fixed toolbar+container shared across fragments —
|
|
23
|
+
* gives two DISTINCT screens the SAME id-set → identical signature → SILENT
|
|
24
|
+
* over-merge (the cardinal failure). But those screens DO differ in chrome
|
|
25
|
+
* LABELS (a home screen vs a settings sub-screen show different toolbar /
|
|
26
|
+
* button text). Including labels makes distinct screens produce distinct
|
|
27
|
+
* signatures, and makes Compose usable at all (label-only tokens).
|
|
28
|
+
* 2. STRUCTURAL scroll-exclusion replaces v1's geometric `contains()`. v1
|
|
29
|
+
* excluded scroll content by bounds-containment, which (a) mis-flagged an
|
|
30
|
+
* overlay/FAB sitting inside a list's rect as content (→ could over-merge),
|
|
31
|
+
* and (b) on iOS the scroll CONTAINER is isAccessible=0 and pruned from the
|
|
32
|
+
* NativeNode[], so geometric exclusion never fired (scroll changed the
|
|
33
|
+
* signature → over-split, feature inert). v2 excludes by TREE STRUCTURE: a
|
|
34
|
+
* node is content iff `insideScrollable` (it has a scrollable ANCESTOR),
|
|
35
|
+
* computed during parsing — see native-a11y.ts. The scroll container's OWN
|
|
36
|
+
* tokens are kept (it's durable chrome; `insideScrollable` is about
|
|
37
|
+
* descendants).
|
|
38
|
+
*
|
|
39
|
+
* The remaining failure mode after v2 is SAFE: dynamic chrome labels (a live
|
|
40
|
+
* clock, an unread badge) cause OVER-SPLIT (a new frame), never over-merge — the
|
|
41
|
+
* backend just mints a fresh frame, which is the conservative direction.
|
|
42
|
+
*
|
|
43
|
+
* USABLE GUARD (load-bearing, unchanged in spirit): `usable` is true only with
|
|
44
|
+
* >= MIN_STABLE_TOKENS tokens. A signature derived from an empty/sparse token
|
|
45
|
+
* set must NEVER be sent — sha1("") (and any near-empty set) collides across
|
|
46
|
+
* distinct screens and would silently over-merge them. When unusable the caller
|
|
47
|
+
* omits the field entirely and the backend falls back to Phase-1 continuity.
|
|
48
|
+
* This is the SAFE default: Flutter (no a11y tree) and the sparsest screens
|
|
49
|
+
* degrade here; id-rich Android and label-rich Compose are the validated wins.
|
|
50
|
+
*/
|
|
51
|
+
import type { NativeNode } from "./native-a11y.js";
|
|
52
|
+
/** Minimum stable-chrome tokens for a signature to be usable (sent to the backend). */
|
|
53
|
+
export declare const MIN_STABLE_TOKENS = 2;
|
|
54
|
+
/** Coarse-token inputs gathered from the device (cheap, almost-always-available). */
|
|
55
|
+
export interface CoarseInputs {
|
|
56
|
+
platform: "android" | "ios";
|
|
57
|
+
/** Android: foreground app package (uiautomator `package` attr). */
|
|
58
|
+
package?: string;
|
|
59
|
+
/** Android: foreground activity (`pkg/activity` from dumpsys). */
|
|
60
|
+
activity?: string;
|
|
61
|
+
/** iOS: active app bundle id (WDA /wda/activeAppInfo). navTitle is derived here. */
|
|
62
|
+
bundleId?: string;
|
|
63
|
+
}
|
|
64
|
+
export interface ScreenSignature {
|
|
65
|
+
/** `platform|coarse|sha1(tokens)` — the value sent as native_screen_signature. */
|
|
66
|
+
value: string;
|
|
67
|
+
/** True only with >= MIN_STABLE_TOKENS tokens; the caller omits the field when false. */
|
|
68
|
+
usable: boolean;
|
|
69
|
+
/** Number of stable chrome tokens — the guard's basis. */
|
|
70
|
+
tokenCount: number;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Compute the screen signature from this step's parsed tree + coarse inputs.
|
|
74
|
+
* `value` is `platform|coarse|sha1(tokens)`; `usable` gates whether it's safe to
|
|
75
|
+
* send (>= MIN_STABLE_TOKENS distinct stable chrome tokens).
|
|
76
|
+
*/
|
|
77
|
+
export declare function computeScreenSignature(nodes: NativeNode[], coarse: CoarseInputs): ScreenSignature;
|