@ishlabs/cli 0.13.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/iteration.js +219 -22
- package/dist/commands/profile.js +75 -9
- package/dist/commands/source.js +6 -4
- package/dist/commands/study-run.js +382 -34
- package/dist/commands/study.js +170 -9
- package/dist/commands/workspace.js +35 -2
- package/dist/lib/accessibility-profile.d.ts +12 -0
- package/dist/lib/accessibility-profile.js +136 -0
- package/dist/lib/ask-questions.js +9 -0
- package/dist/lib/billing.d.ts +55 -0
- package/dist/lib/billing.js +77 -0
- package/dist/lib/docs.js +1106 -36
- package/dist/lib/enums.d.ts +54 -0
- package/dist/lib/enums.js +100 -0
- package/dist/lib/local-sim/actions.d.ts +2 -1
- package/dist/lib/local-sim/actions.js +88 -13
- package/dist/lib/local-sim/loop.js +49 -19
- package/dist/lib/local-sim/tabs.d.ts +27 -0
- package/dist/lib/local-sim/tabs.js +157 -0
- package/dist/lib/local-sim/types.d.ts +15 -0
- package/dist/lib/modality.d.ts +70 -1
- package/dist/lib/modality.js +323 -17
- package/dist/lib/output.js +61 -4
- package/dist/lib/skill-content.js +382 -19
- package/dist/lib/types.d.ts +6 -1
- package/package.json +1 -1
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enum-value normalisation for CLI flags.
|
|
3
|
+
*
|
|
4
|
+
* Agents (and humans) reach for hyphen-style values on the command line —
|
|
5
|
+
* `--screen-format mobile-portrait`, `--kind text-file`, `--chat-mode
|
|
6
|
+
* tester-pair` — even when the canonical backend value is underscored (or
|
|
7
|
+
* vice versa for the ask-question `type` field, which is hyphenated). Rather
|
|
8
|
+
* than fail with a 422, each parse site funnels the raw value through
|
|
9
|
+
* `normalizeEnumValue` and gets back the canonical form (or `null` for a
|
|
10
|
+
* genuine typo, so the caller can throw a clean ValidationError).
|
|
11
|
+
*
|
|
12
|
+
* Scope: apply this only to flags whose allowed values are explicitly enumerated
|
|
13
|
+
* client-side. Don't blanket-rewrite arbitrary strings — that would mask real
|
|
14
|
+
* typos (`mobil_portrait` ≠ `mobile_portrait`) and silently coerce values the
|
|
15
|
+
* backend hasn't agreed to.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Case-insensitively match `raw` against `allowed`, treating hyphens and
|
|
19
|
+
* underscores as interchangeable. Returns the canonical form from `allowed`
|
|
20
|
+
* if matched, `null` otherwise.
|
|
21
|
+
*/
|
|
22
|
+
export declare function normalizeEnumValue<T extends string>(raw: string | undefined | null, allowed: readonly T[]): T | null;
|
|
23
|
+
/** Interactive-iteration screen format. Canonical underscored. */
|
|
24
|
+
export declare const SCREEN_FORMATS: readonly ["desktop", "mobile_portrait"];
|
|
25
|
+
export type ScreenFormat = typeof SCREEN_FORMATS[number];
|
|
26
|
+
/**
|
|
27
|
+
* Interview-question types. Canonical is **hyphenated** for the multi-word
|
|
28
|
+
* values (`single-choice`, `multiple-choice`) — that's what the backend
|
|
29
|
+
* accepts. The normaliser folds underscored variants back to the canonical
|
|
30
|
+
* hyphenated form.
|
|
31
|
+
*/
|
|
32
|
+
export declare const QUESTION_TYPES: readonly ["text", "slider", "likert", "single-choice", "multiple-choice", "number"];
|
|
33
|
+
export type QuestionType = typeof QUESTION_TYPES[number];
|
|
34
|
+
/**
|
|
35
|
+
* TesterProfile structured enums (profile-enums.v1.json). Values are
|
|
36
|
+
* snake_case and match the spec byte-for-byte; agents pass them verbatim
|
|
37
|
+
* via CLI flags.
|
|
38
|
+
*/
|
|
39
|
+
export declare const EDUCATION_LEVELS: readonly ["less_than_secondary", "secondary", "some_post_secondary", "vocational_or_associate", "bachelor", "graduate"];
|
|
40
|
+
export type EducationLevel = typeof EDUCATION_LEVELS[number];
|
|
41
|
+
export declare const HOUSEHOLDS: readonly ["single", "couple_no_kids", "couple_with_kids", "single_parent", "shared_housing", "adult_with_parents", "multi_generational"];
|
|
42
|
+
export type Household = typeof HOUSEHOLDS[number];
|
|
43
|
+
export declare const LOCALE_TYPES: readonly ["urban", "suburban", "small_town", "rural"];
|
|
44
|
+
export type LocaleType = typeof LOCALE_TYPES[number];
|
|
45
|
+
export declare const INCOME_LEVELS: readonly ["lower", "lower_middle", "middle", "upper_middle", "upper", "prefer_not_to_say"];
|
|
46
|
+
export type IncomeLevel = typeof INCOME_LEVELS[number];
|
|
47
|
+
export declare const EMPLOYMENT_STATUSES: readonly ["employed_full_time", "employed_part_time", "self_employed", "unemployed_seeking", "student", "homemaker", "retired", "unable_to_work", "other"];
|
|
48
|
+
export type EmploymentStatus = typeof EMPLOYMENT_STATUSES[number];
|
|
49
|
+
/**
|
|
50
|
+
* Strict enum check for CLI flag values. Snake_case-only; no hyphen folding,
|
|
51
|
+
* because the spec values are the wire format and agents are expected to
|
|
52
|
+
* pass them verbatim.
|
|
53
|
+
*/
|
|
54
|
+
export declare function assertEnumValue<T extends string>(raw: string, allowed: readonly T[], flagName: string): T;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enum-value normalisation for CLI flags.
|
|
3
|
+
*
|
|
4
|
+
* Agents (and humans) reach for hyphen-style values on the command line —
|
|
5
|
+
* `--screen-format mobile-portrait`, `--kind text-file`, `--chat-mode
|
|
6
|
+
* tester-pair` — even when the canonical backend value is underscored (or
|
|
7
|
+
* vice versa for the ask-question `type` field, which is hyphenated). Rather
|
|
8
|
+
* than fail with a 422, each parse site funnels the raw value through
|
|
9
|
+
* `normalizeEnumValue` and gets back the canonical form (or `null` for a
|
|
10
|
+
* genuine typo, so the caller can throw a clean ValidationError).
|
|
11
|
+
*
|
|
12
|
+
* Scope: apply this only to flags whose allowed values are explicitly enumerated
|
|
13
|
+
* client-side. Don't blanket-rewrite arbitrary strings — that would mask real
|
|
14
|
+
* typos (`mobil_portrait` ≠ `mobile_portrait`) and silently coerce values the
|
|
15
|
+
* backend hasn't agreed to.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Case-insensitively match `raw` against `allowed`, treating hyphens and
|
|
19
|
+
* underscores as interchangeable. Returns the canonical form from `allowed`
|
|
20
|
+
* if matched, `null` otherwise.
|
|
21
|
+
*/
|
|
22
|
+
export function normalizeEnumValue(raw, allowed) {
|
|
23
|
+
if (raw === undefined || raw === null)
|
|
24
|
+
return null;
|
|
25
|
+
const key = String(raw).trim().toLowerCase().replace(/-/g, "_");
|
|
26
|
+
for (const a of allowed) {
|
|
27
|
+
if (a.toLowerCase().replace(/-/g, "_") === key)
|
|
28
|
+
return a;
|
|
29
|
+
}
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
/** Interactive-iteration screen format. Canonical underscored. */
|
|
33
|
+
export const SCREEN_FORMATS = ["desktop", "mobile_portrait"];
|
|
34
|
+
/**
|
|
35
|
+
* Interview-question types. Canonical is **hyphenated** for the multi-word
|
|
36
|
+
* values (`single-choice`, `multiple-choice`) — that's what the backend
|
|
37
|
+
* accepts. The normaliser folds underscored variants back to the canonical
|
|
38
|
+
* hyphenated form.
|
|
39
|
+
*/
|
|
40
|
+
export const QUESTION_TYPES = [
|
|
41
|
+
"text",
|
|
42
|
+
"slider",
|
|
43
|
+
"likert",
|
|
44
|
+
"single-choice",
|
|
45
|
+
"multiple-choice",
|
|
46
|
+
"number",
|
|
47
|
+
];
|
|
48
|
+
/**
|
|
49
|
+
* TesterProfile structured enums (profile-enums.v1.json). Values are
|
|
50
|
+
* snake_case and match the spec byte-for-byte; agents pass them verbatim
|
|
51
|
+
* via CLI flags.
|
|
52
|
+
*/
|
|
53
|
+
export const EDUCATION_LEVELS = [
|
|
54
|
+
"less_than_secondary",
|
|
55
|
+
"secondary",
|
|
56
|
+
"some_post_secondary",
|
|
57
|
+
"vocational_or_associate",
|
|
58
|
+
"bachelor",
|
|
59
|
+
"graduate",
|
|
60
|
+
];
|
|
61
|
+
export const HOUSEHOLDS = [
|
|
62
|
+
"single",
|
|
63
|
+
"couple_no_kids",
|
|
64
|
+
"couple_with_kids",
|
|
65
|
+
"single_parent",
|
|
66
|
+
"shared_housing",
|
|
67
|
+
"adult_with_parents",
|
|
68
|
+
"multi_generational",
|
|
69
|
+
];
|
|
70
|
+
export const LOCALE_TYPES = ["urban", "suburban", "small_town", "rural"];
|
|
71
|
+
export const INCOME_LEVELS = [
|
|
72
|
+
"lower",
|
|
73
|
+
"lower_middle",
|
|
74
|
+
"middle",
|
|
75
|
+
"upper_middle",
|
|
76
|
+
"upper",
|
|
77
|
+
"prefer_not_to_say",
|
|
78
|
+
];
|
|
79
|
+
export const EMPLOYMENT_STATUSES = [
|
|
80
|
+
"employed_full_time",
|
|
81
|
+
"employed_part_time",
|
|
82
|
+
"self_employed",
|
|
83
|
+
"unemployed_seeking",
|
|
84
|
+
"student",
|
|
85
|
+
"homemaker",
|
|
86
|
+
"retired",
|
|
87
|
+
"unable_to_work",
|
|
88
|
+
"other",
|
|
89
|
+
];
|
|
90
|
+
/**
|
|
91
|
+
* Strict enum check for CLI flag values. Snake_case-only; no hyphen folding,
|
|
92
|
+
* because the spec values are the wire format and agents are expected to
|
|
93
|
+
* pass them verbatim.
|
|
94
|
+
*/
|
|
95
|
+
export function assertEnumValue(raw, allowed, flagName) {
|
|
96
|
+
if (!allowed.includes(raw)) {
|
|
97
|
+
throw new Error(`Invalid ${flagName}: "${raw}". Allowed values: ${allowed.join(", ")}.`);
|
|
98
|
+
}
|
|
99
|
+
return raw;
|
|
100
|
+
}
|
|
@@ -8,10 +8,11 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Page } from "playwright-core";
|
|
10
10
|
import type { LocalStepAction, ActionResult, ContextValue, TreeData } from "./types.js";
|
|
11
|
+
import type { TabManager } from "./tabs.js";
|
|
11
12
|
/**
|
|
12
13
|
* Execute a single action on the page.
|
|
13
14
|
*/
|
|
14
|
-
export declare function executeAction(page: Page, action: LocalStepAction, treeData: TreeData, contextValues: ContextValue[]): Promise<ActionResult>;
|
|
15
|
+
export declare function executeAction(page: Page, action: LocalStepAction, treeData: TreeData, contextValues: ContextValue[], tabs?: TabManager): Promise<ActionResult>;
|
|
15
16
|
/**
|
|
16
17
|
* Compare two base64 screenshots to detect visible change.
|
|
17
18
|
*/
|
|
@@ -8,6 +8,37 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import { resolveNodeToBoundingBox } from "./browser.js";
|
|
10
10
|
import { isDebugEnabled } from "./debug.js";
|
|
11
|
+
// Agent-facing modifier names → Playwright keyboard modifier names.
|
|
12
|
+
// Mirrors backend `_PLAYWRIGHT_MODIFIERS` in
|
|
13
|
+
// ish-backend/app/simulation/executors/browser.py.
|
|
14
|
+
const PLAYWRIGHT_MODIFIERS = {
|
|
15
|
+
cmd: "Meta", ctrl: "Control", shift: "Shift", alt: "Alt",
|
|
16
|
+
};
|
|
17
|
+
function toPWModifiers(mods) {
|
|
18
|
+
if (!mods?.length)
|
|
19
|
+
return [];
|
|
20
|
+
const out = [];
|
|
21
|
+
for (const m of mods) {
|
|
22
|
+
const mapped = PLAYWRIGHT_MODIFIERS[m.toLowerCase()];
|
|
23
|
+
if (mapped)
|
|
24
|
+
out.push(mapped);
|
|
25
|
+
}
|
|
26
|
+
return out;
|
|
27
|
+
}
|
|
28
|
+
async function withModifiers(page, mods, fn) {
|
|
29
|
+
if (!mods.length)
|
|
30
|
+
return fn();
|
|
31
|
+
for (const m of mods)
|
|
32
|
+
await page.keyboard.down(m);
|
|
33
|
+
try {
|
|
34
|
+
return await fn();
|
|
35
|
+
}
|
|
36
|
+
finally {
|
|
37
|
+
for (const m of [...mods].reverse()) {
|
|
38
|
+
await page.keyboard.up(m).catch(() => { });
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
11
42
|
// --- ARIA role → Playwright role mapping ---
|
|
12
43
|
const ELEMENT_TYPE_TO_ROLE = {
|
|
13
44
|
BUTTON: "button",
|
|
@@ -26,7 +57,7 @@ const ELEMENT_TYPE_TO_ROLE = {
|
|
|
26
57
|
/**
|
|
27
58
|
* Execute a single action on the page.
|
|
28
59
|
*/
|
|
29
|
-
export async function executeAction(page, action, treeData, contextValues) {
|
|
60
|
+
export async function executeAction(page, action, treeData, contextValues, tabs) {
|
|
30
61
|
try {
|
|
31
62
|
// Intercept "back button" taps — the LLM often tries to tap the browser
|
|
32
63
|
// back button which doesn't exist in the viewport. Convert to page.goBack().
|
|
@@ -71,6 +102,18 @@ export async function executeAction(page, action, treeData, contextValues) {
|
|
|
71
102
|
case "think":
|
|
72
103
|
// No-op: model is reasoning without acting
|
|
73
104
|
break;
|
|
105
|
+
case "keyboard_shortcut":
|
|
106
|
+
await executeKeyboardShortcut(page, action);
|
|
107
|
+
break;
|
|
108
|
+
case "switch_tab":
|
|
109
|
+
case "close_tab":
|
|
110
|
+
if (tabs && action.tab_id) {
|
|
111
|
+
if (action.type === "switch_tab")
|
|
112
|
+
await tabs.switchTab(action.tab_id);
|
|
113
|
+
else
|
|
114
|
+
await tabs.closeTab(action.tab_id);
|
|
115
|
+
}
|
|
116
|
+
break;
|
|
74
117
|
case "pinch_zoom":
|
|
75
118
|
case "rotate_device":
|
|
76
119
|
// Not supported in desktop browser
|
|
@@ -178,9 +221,12 @@ async function executeTap(page, action, treeData) {
|
|
|
178
221
|
const count = action.count ?? 1;
|
|
179
222
|
const coords = await resolveElement(page, action, treeData);
|
|
180
223
|
if (coords) {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
224
|
+
const pwMods = toPWModifiers(action.modifiers);
|
|
225
|
+
await withModifiers(page, pwMods, async () => {
|
|
226
|
+
for (let i = 0; i < count; i++) {
|
|
227
|
+
await page.mouse.click(coords.x, coords.y);
|
|
228
|
+
}
|
|
229
|
+
});
|
|
184
230
|
return coords;
|
|
185
231
|
}
|
|
186
232
|
else {
|
|
@@ -295,22 +341,40 @@ async function executeLongPress(page, action, treeData) {
|
|
|
295
341
|
const coords = await resolveElement(page, action, treeData);
|
|
296
342
|
if (!coords)
|
|
297
343
|
throw new Error(`Cannot locate element for long press: ${action.element_name ?? "unknown"}`);
|
|
298
|
-
|
|
299
|
-
await page
|
|
300
|
-
|
|
301
|
-
|
|
344
|
+
const pwMods = toPWModifiers(action.modifiers);
|
|
345
|
+
await withModifiers(page, pwMods, async () => {
|
|
346
|
+
await page.mouse.move(coords.x, coords.y);
|
|
347
|
+
await page.mouse.down();
|
|
348
|
+
await page.waitForTimeout(action.duration_ms ?? 500);
|
|
349
|
+
await page.mouse.up();
|
|
350
|
+
});
|
|
302
351
|
return coords;
|
|
303
352
|
}
|
|
304
353
|
async function executeDoubleTap(page, action, treeData) {
|
|
305
354
|
const coords = await resolveElement(page, action, treeData);
|
|
306
355
|
if (coords) {
|
|
307
|
-
|
|
356
|
+
const pwMods = toPWModifiers(action.modifiers);
|
|
357
|
+
await withModifiers(page, pwMods, async () => {
|
|
358
|
+
await page.mouse.dblclick(coords.x, coords.y);
|
|
359
|
+
});
|
|
308
360
|
return coords;
|
|
309
361
|
}
|
|
310
362
|
else {
|
|
311
363
|
throw new Error(`Cannot locate element for double tap: ${action.element_name ?? "unknown"}`);
|
|
312
364
|
}
|
|
313
365
|
}
|
|
366
|
+
/**
|
|
367
|
+
* Press a key combination on the currently focused element.
|
|
368
|
+
* Mirrors backend BrowserActionExecutor.execute_keyboard_shortcut.
|
|
369
|
+
*/
|
|
370
|
+
async function executeKeyboardShortcut(page, action) {
|
|
371
|
+
if (!action.key) {
|
|
372
|
+
throw new Error("keyboard_shortcut missing key");
|
|
373
|
+
}
|
|
374
|
+
const pwMods = toPWModifiers(action.modifiers);
|
|
375
|
+
const combo = pwMods.length ? `${pwMods.join("+")}+${action.key}` : action.key;
|
|
376
|
+
await page.keyboard.press(combo);
|
|
377
|
+
}
|
|
314
378
|
// --- Helpers ---
|
|
315
379
|
/**
|
|
316
380
|
* Resolve the actual text to type from an action, handling var/secret value types.
|
|
@@ -343,11 +407,12 @@ export function detectNoVisibleChange(before, after) {
|
|
|
343
407
|
*/
|
|
344
408
|
export function describeAction(action) {
|
|
345
409
|
const element = action.element_name || "element";
|
|
410
|
+
const modSuffix = action.modifiers?.length ? ` [${action.modifiers.join("+")}]` : "";
|
|
346
411
|
switch (action.type) {
|
|
347
412
|
case "tap":
|
|
348
413
|
return action.count && action.count > 1
|
|
349
|
-
? `tap on '${element}' x${action.count}`
|
|
350
|
-
: `tap on '${element}'`;
|
|
414
|
+
? `tap on '${element}' x${action.count}${modSuffix}`
|
|
415
|
+
: `tap on '${element}'${modSuffix}`;
|
|
351
416
|
case "text_input": {
|
|
352
417
|
const val = action.value_type === "secret" ? "***" : `"${(action.value ?? "").slice(0, 30)}"`;
|
|
353
418
|
const modeStr = action.mode ? ` (${action.mode}${action.submit ? ", submit" : ""})` : "";
|
|
@@ -364,15 +429,25 @@ export function describeAction(action) {
|
|
|
364
429
|
case "navigate_back":
|
|
365
430
|
return "navigate back";
|
|
366
431
|
case "long_press":
|
|
367
|
-
return `long_press on '${element}'`;
|
|
432
|
+
return `long_press on '${element}'${modSuffix}`;
|
|
368
433
|
case "double_tap":
|
|
369
|
-
return `double_tap on '${element}'`;
|
|
434
|
+
return `double_tap on '${element}'${modSuffix}`;
|
|
370
435
|
case "drag":
|
|
371
436
|
return `drag '${element}'`;
|
|
372
437
|
case "think":
|
|
373
438
|
return `think: "${(action.thoughts ?? "").slice(0, 50)}"`;
|
|
374
439
|
case "pull_to_refresh":
|
|
375
440
|
return "pull_to_refresh";
|
|
441
|
+
case "keyboard_shortcut": {
|
|
442
|
+
const combo = action.modifiers?.length
|
|
443
|
+
? `${action.modifiers.join("+")}+${action.key ?? "?"}`
|
|
444
|
+
: (action.key ?? "?");
|
|
445
|
+
return `keyboard_shortcut '${combo}'`;
|
|
446
|
+
}
|
|
447
|
+
case "switch_tab":
|
|
448
|
+
return `switch_tab '${action.tab_id ?? "?"}'`;
|
|
449
|
+
case "close_tab":
|
|
450
|
+
return `close_tab '${action.tab_id ?? "?"}'`;
|
|
376
451
|
default:
|
|
377
452
|
return `${action.type} on '${element}'`;
|
|
378
453
|
}
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import { launchBrowser, launchSharedBrowser, createTab, captureObservation, takeScreenshot, takeScreenshotJpeg, navigateWithRetry, closeBrowser } from "./browser.js";
|
|
8
8
|
import { uploadScreenshot } from "./upload.js";
|
|
9
9
|
import { executeAction, detectNoVisibleChange, describeAction } from "./actions.js";
|
|
10
|
+
import { TabManager } from "./tabs.js";
|
|
10
11
|
import { enableDebug, isDebugEnabled, debugObservation, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
|
|
11
12
|
/**
|
|
12
13
|
* Convert a raw action (from either resolved_actions or output.action.actions)
|
|
@@ -32,6 +33,9 @@ function flattenAction(raw, nodeId = null, nodeDescription = null) {
|
|
|
32
33
|
count: a.count ?? null,
|
|
33
34
|
duration_ms: a.duration_ms ?? null,
|
|
34
35
|
thoughts: a.thoughts ?? null,
|
|
36
|
+
modifiers: Array.isArray(a.modifiers) ? a.modifiers : null,
|
|
37
|
+
key: a.key ?? null,
|
|
38
|
+
tab_id: a.tab_id ?? null,
|
|
35
39
|
};
|
|
36
40
|
}
|
|
37
41
|
/**
|
|
@@ -199,7 +203,10 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
199
203
|
const browserSession = sharedBrowser
|
|
200
204
|
? await createTab(sharedBrowser, browserOpts)
|
|
201
205
|
: await launchBrowser(browserOpts);
|
|
202
|
-
|
|
206
|
+
// Active page can swap when a popup auto-focuses or the LLM issues
|
|
207
|
+
// switch_tab/close_tab. TabManager wires the context popup listener.
|
|
208
|
+
const tabs = new TabManager(browserSession.context, browserSession.page);
|
|
209
|
+
let page = tabs.activePage();
|
|
203
210
|
const history = [];
|
|
204
211
|
const interactions = [];
|
|
205
212
|
const debugSteps = [];
|
|
@@ -218,7 +225,8 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
218
225
|
let step = 0;
|
|
219
226
|
let assignmentCompleted = false;
|
|
220
227
|
while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
|
|
221
|
-
// OBSERVE
|
|
228
|
+
// OBSERVE — refresh active page in case a popup or switch_tab changed it
|
|
229
|
+
page = tabs.activePage();
|
|
222
230
|
const obs = await captureObservation(page);
|
|
223
231
|
const lastTreeData = obs.treeData;
|
|
224
232
|
const currentScreenshot = obs.screenshot;
|
|
@@ -236,6 +244,9 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
236
244
|
if (forwards.length > 0)
|
|
237
245
|
debugForwards(forwards);
|
|
238
246
|
const viewportSize = page.viewportSize() ?? viewport;
|
|
247
|
+
// Snapshot open tabs so the backend can prompt the LLM with tab ids
|
|
248
|
+
// (used by switch_tab/close_tab and to disambiguate cmd+click results).
|
|
249
|
+
const tabsSnapshot = await tabs.list();
|
|
239
250
|
// REASON (remote)
|
|
240
251
|
let stepResponse;
|
|
241
252
|
try {
|
|
@@ -258,6 +269,7 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
258
269
|
agent_model: session.agent_model,
|
|
259
270
|
dom_model: session.dom_model,
|
|
260
271
|
llm_provider: session.llm_provider,
|
|
272
|
+
tabs: tabsSnapshot,
|
|
261
273
|
};
|
|
262
274
|
stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
|
|
263
275
|
}
|
|
@@ -285,6 +297,7 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
285
297
|
agent_model: session.agent_model,
|
|
286
298
|
dom_model: session.dom_model,
|
|
287
299
|
llm_provider: session.llm_provider,
|
|
300
|
+
tabs: tabsSnapshot,
|
|
288
301
|
};
|
|
289
302
|
stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
|
|
290
303
|
}
|
|
@@ -304,10 +317,17 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
304
317
|
for (let i = 0; i < stepResponse.actions.length; i++) {
|
|
305
318
|
if (isCancelled())
|
|
306
319
|
break;
|
|
320
|
+
// Pick up popup auto-switch / explicit tab switch from prior actions.
|
|
321
|
+
page = tabs.activePage();
|
|
307
322
|
const action = stepResponse.actions[i];
|
|
308
|
-
const
|
|
323
|
+
const tabsBefore = (await tabs.list()).length;
|
|
324
|
+
const result = await executeAction(page, action, lastTreeData, session.context_values, tabs);
|
|
309
325
|
const desc = describeAction(action);
|
|
310
326
|
debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
|
|
327
|
+
// The action may have flipped the active tab — re-read.
|
|
328
|
+
page = tabs.activePage();
|
|
329
|
+
const tabsAfter = (await tabs.list()).length;
|
|
330
|
+
const openedNewTab = action.type === "tap" && tabsAfter > tabsBefore;
|
|
311
331
|
let normalizedCoords = null;
|
|
312
332
|
if (result.coordinates) {
|
|
313
333
|
const vp = page.viewportSize() ?? viewport;
|
|
@@ -317,22 +337,29 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
317
337
|
};
|
|
318
338
|
}
|
|
319
339
|
const actionType = action.type || "unknown";
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
340
|
+
const INTERNAL_ACTIONS = new Set(["think"]);
|
|
341
|
+
if (!INTERNAL_ACTIONS.has(actionType)) {
|
|
342
|
+
actionDatas.push({
|
|
343
|
+
action_type: actionType,
|
|
344
|
+
element_label: action.element_name ?? null,
|
|
345
|
+
element_type: action.element_type ?? null,
|
|
346
|
+
coordinates: normalizedCoords,
|
|
347
|
+
data: {
|
|
348
|
+
...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
|
|
349
|
+
...(action.mode && { mode: action.mode }),
|
|
350
|
+
...(action.submit && { submit: action.submit }),
|
|
351
|
+
...(action.direction && { direction: action.direction }),
|
|
352
|
+
...(action.amount && { amount: action.amount }),
|
|
353
|
+
...(action.count && action.count > 1 && { count: action.count }),
|
|
354
|
+
...(action.duration_ms && { duration_ms: action.duration_ms }),
|
|
355
|
+
...(action.modifiers?.length && { modifiers: action.modifiers }),
|
|
356
|
+
...(action.key && { key: action.key }),
|
|
357
|
+
...(action.tab_id && { tab_id: action.tab_id }),
|
|
358
|
+
...(openedNewTab && { opened_new_tab: true }),
|
|
359
|
+
},
|
|
360
|
+
order: i,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
336
363
|
actionDebugEntries.push({
|
|
337
364
|
type: actionType,
|
|
338
365
|
elementName: action.element_name ?? null,
|
|
@@ -422,6 +449,9 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
|
|
|
422
449
|
actions: actionDatas,
|
|
423
450
|
current_location: stepResponse.current_location,
|
|
424
451
|
assignment_completed: stepResponse.assignment_completed,
|
|
452
|
+
// Server reduces this to Interaction.tab when N >= 2; omit on
|
|
453
|
+
// single-tab steps to keep the payload (and DB column) null.
|
|
454
|
+
...(tabsSnapshot.length >= 2 ? { tabs: tabsSnapshot } : {}),
|
|
425
455
|
});
|
|
426
456
|
// Update history for next step
|
|
427
457
|
history.push({
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tab manager — tracks open pages with stable ids, auto-switches on popup,
|
|
3
|
+
* falls back to the opener tab when the active tab closes.
|
|
4
|
+
*
|
|
5
|
+
* Mirrors backend semantics in
|
|
6
|
+
* `ish-backend/app/simulation/computers/browser/computer.py` (TabRecord +
|
|
7
|
+
* _register_tab / _on_new_page / _set_active_tab / _handle_tab_closed).
|
|
8
|
+
*/
|
|
9
|
+
import type { BrowserContext, Page } from "playwright-core";
|
|
10
|
+
import type { LocalTabInfo } from "./types.js";
|
|
11
|
+
export declare class TabManager {
|
|
12
|
+
private readonly context;
|
|
13
|
+
private readonly tabs;
|
|
14
|
+
private activeTabId;
|
|
15
|
+
private counter;
|
|
16
|
+
constructor(context: BrowserContext, initialPage: Page);
|
|
17
|
+
activePage(): Page;
|
|
18
|
+
activeId(): string | null;
|
|
19
|
+
list(): Promise<LocalTabInfo[]>;
|
|
20
|
+
switchTab(tabId: string): Promise<void>;
|
|
21
|
+
closeTab(tabId: string): Promise<void>;
|
|
22
|
+
private registerTab;
|
|
23
|
+
private onNewPage;
|
|
24
|
+
private afterNewPageFocus;
|
|
25
|
+
private setActiveTab;
|
|
26
|
+
private handleTabClosed;
|
|
27
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tab manager — tracks open pages with stable ids, auto-switches on popup,
|
|
3
|
+
* falls back to the opener tab when the active tab closes.
|
|
4
|
+
*
|
|
5
|
+
* Mirrors backend semantics in
|
|
6
|
+
* `ish-backend/app/simulation/computers/browser/computer.py` (TabRecord +
|
|
7
|
+
* _register_tab / _on_new_page / _set_active_tab / _handle_tab_closed).
|
|
8
|
+
*/
|
|
9
|
+
import { isDebugEnabled } from "./debug.js";
|
|
10
|
+
const MAX_OPEN_TABS = 10;
|
|
11
|
+
const NEW_TAB_LOAD_TIMEOUT_MS = 5000;
|
|
12
|
+
export class TabManager {
|
|
13
|
+
context;
|
|
14
|
+
tabs = new Map();
|
|
15
|
+
activeTabId = null;
|
|
16
|
+
counter = 0;
|
|
17
|
+
constructor(context, initialPage) {
|
|
18
|
+
this.context = context;
|
|
19
|
+
this.context.on("page", (page) => this.onNewPage(page));
|
|
20
|
+
const id = this.registerTab(initialPage, null);
|
|
21
|
+
this.activeTabId = id;
|
|
22
|
+
}
|
|
23
|
+
activePage() {
|
|
24
|
+
if (!this.activeTabId) {
|
|
25
|
+
throw new Error("No active tab");
|
|
26
|
+
}
|
|
27
|
+
const rec = this.tabs.get(this.activeTabId);
|
|
28
|
+
if (!rec)
|
|
29
|
+
throw new Error(`Active tab ${this.activeTabId} not registered`);
|
|
30
|
+
return rec.page;
|
|
31
|
+
}
|
|
32
|
+
activeId() {
|
|
33
|
+
return this.activeTabId;
|
|
34
|
+
}
|
|
35
|
+
async list() {
|
|
36
|
+
const out = [];
|
|
37
|
+
const sorted = [...this.tabs.values()].sort((a, b) => a.openedAt - b.openedAt);
|
|
38
|
+
for (const rec of sorted) {
|
|
39
|
+
if (rec.page.isClosed())
|
|
40
|
+
continue;
|
|
41
|
+
let title = "";
|
|
42
|
+
try {
|
|
43
|
+
title = await rec.page.title();
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// ignore
|
|
47
|
+
}
|
|
48
|
+
out.push({
|
|
49
|
+
id: rec.id,
|
|
50
|
+
title: title.slice(0, 80),
|
|
51
|
+
url: rec.page.url(),
|
|
52
|
+
active: rec.id === this.activeTabId,
|
|
53
|
+
opener_id: rec.parentId && this.tabs.has(rec.parentId) ? rec.parentId : null,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return out;
|
|
57
|
+
}
|
|
58
|
+
async switchTab(tabId) {
|
|
59
|
+
await this.setActiveTab(tabId);
|
|
60
|
+
}
|
|
61
|
+
async closeTab(tabId) {
|
|
62
|
+
const rec = this.tabs.get(tabId);
|
|
63
|
+
if (!rec) {
|
|
64
|
+
throw new Error(`Unknown tab id: ${tabId}`);
|
|
65
|
+
}
|
|
66
|
+
await rec.page.close().catch(() => { });
|
|
67
|
+
// page.on('close') drives the rest of the bookkeeping.
|
|
68
|
+
}
|
|
69
|
+
registerTab(page, parentId) {
|
|
70
|
+
this.counter += 1;
|
|
71
|
+
const id = `t${this.counter}`;
|
|
72
|
+
this.tabs.set(id, { id, page, parentId, openedAt: Date.now() });
|
|
73
|
+
page.on("close", () => {
|
|
74
|
+
void this.handleTabClosed(id);
|
|
75
|
+
});
|
|
76
|
+
return id;
|
|
77
|
+
}
|
|
78
|
+
onNewPage(page) {
|
|
79
|
+
if (this.tabs.size >= MAX_OPEN_TABS) {
|
|
80
|
+
if (isDebugEnabled()) {
|
|
81
|
+
console.error(` [tabs] cap reached (${this.tabs.size}), closing extra popup`);
|
|
82
|
+
}
|
|
83
|
+
void page.close().catch(() => { });
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
const parentId = this.activeTabId;
|
|
87
|
+
const tabId = this.registerTab(page, parentId);
|
|
88
|
+
if (isDebugEnabled()) {
|
|
89
|
+
console.error(` [tabs] new tab ${tabId} (parent=${parentId ?? "-"}, url=${page.url()})`);
|
|
90
|
+
}
|
|
91
|
+
void this.afterNewPageFocus(tabId, page);
|
|
92
|
+
}
|
|
93
|
+
async afterNewPageFocus(tabId, page) {
|
|
94
|
+
try {
|
|
95
|
+
await page.waitForLoadState("domcontentloaded", { timeout: NEW_TAB_LOAD_TIMEOUT_MS });
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
if (isDebugEnabled()) {
|
|
99
|
+
console.error(` [tabs] tab ${tabId} did not reach domcontentloaded in ${NEW_TAB_LOAD_TIMEOUT_MS}ms`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// The tab may have closed during the wait (OAuth pop-then-redirect, etc).
|
|
103
|
+
if (!this.tabs.has(tabId))
|
|
104
|
+
return;
|
|
105
|
+
await this.setActiveTab(tabId);
|
|
106
|
+
}
|
|
107
|
+
async setActiveTab(tabId) {
|
|
108
|
+
const rec = this.tabs.get(tabId);
|
|
109
|
+
if (!rec) {
|
|
110
|
+
throw new Error(`Unknown tab id: ${tabId}`);
|
|
111
|
+
}
|
|
112
|
+
if (rec.page.isClosed()) {
|
|
113
|
+
await this.handleTabClosed(tabId);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
this.activeTabId = tabId;
|
|
117
|
+
try {
|
|
118
|
+
await rec.page.bringToFront();
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
// Headless or transient — non-fatal.
|
|
122
|
+
}
|
|
123
|
+
if (isDebugEnabled()) {
|
|
124
|
+
console.error(` [tabs] active → ${tabId} (${rec.page.url()})`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async handleTabClosed(tabId) {
|
|
128
|
+
const rec = this.tabs.get(tabId);
|
|
129
|
+
if (!rec)
|
|
130
|
+
return;
|
|
131
|
+
this.tabs.delete(tabId);
|
|
132
|
+
if (this.activeTabId !== tabId) {
|
|
133
|
+
// A background tab closed — no focus change needed.
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
// Prefer the opener tab if it's still around. If not, pick the most
|
|
137
|
+
// recently opened survivor (closest in time to the just-closed one).
|
|
138
|
+
let fallbackId = rec.parentId && this.tabs.has(rec.parentId) ? rec.parentId : null;
|
|
139
|
+
if (!fallbackId && this.tabs.size > 0) {
|
|
140
|
+
let newest = null;
|
|
141
|
+
for (const t of this.tabs.values()) {
|
|
142
|
+
if (!newest || t.openedAt > newest.openedAt)
|
|
143
|
+
newest = t;
|
|
144
|
+
}
|
|
145
|
+
fallbackId = newest?.id ?? null;
|
|
146
|
+
}
|
|
147
|
+
if (fallbackId) {
|
|
148
|
+
await this.setActiveTab(fallbackId);
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
this.activeTabId = null;
|
|
152
|
+
if (isDebugEnabled()) {
|
|
153
|
+
console.error(` [tabs] all tabs closed; no active page remains`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|