@ishlabs/cli 0.13.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Enum-value normalisation for CLI flags.
3
+ *
4
+ * Agents (and humans) reach for hyphen-style values on the command line —
5
+ * `--screen-format mobile-portrait`, `--kind text-file`, `--chat-mode
6
+ * tester-pair` — even when the canonical backend value is underscored (or
7
+ * vice versa for the ask-question `type` field, which is hyphenated). Rather
8
+ * than fail with a 422, each parse site funnels the raw value through
9
+ * `normalizeEnumValue` and gets back the canonical form (or `null` for a
10
+ * genuine typo, so the caller can throw a clean ValidationError).
11
+ *
12
+ * Scope: apply this only to flags whose allowed values are explicitly enumerated
13
+ * client-side. Don't blanket-rewrite arbitrary strings — that would mask real
14
+ * typos (`mobil_portrait` ≠ `mobile_portrait`) and silently coerce values the
15
+ * backend hasn't agreed to.
16
+ */
17
+ /**
18
+ * Case-insensitively match `raw` against `allowed`, treating hyphens and
19
+ * underscores as interchangeable. Returns the canonical form from `allowed`
20
+ * if matched, `null` otherwise.
21
+ */
22
+ export declare function normalizeEnumValue<T extends string>(raw: string | undefined | null, allowed: readonly T[]): T | null;
23
+ /** Interactive-iteration screen format. Canonical underscored. */
24
+ export declare const SCREEN_FORMATS: readonly ["desktop", "mobile_portrait"];
25
+ export type ScreenFormat = typeof SCREEN_FORMATS[number];
26
+ /**
27
+ * Interview-question types. Canonical is **hyphenated** for the multi-word
28
+ * values (`single-choice`, `multiple-choice`) — that's what the backend
29
+ * accepts. The normaliser folds underscored variants back to the canonical
30
+ * hyphenated form.
31
+ */
32
+ export declare const QUESTION_TYPES: readonly ["text", "slider", "likert", "single-choice", "multiple-choice", "number"];
33
+ export type QuestionType = typeof QUESTION_TYPES[number];
34
+ /**
35
+ * TesterProfile structured enums (profile-enums.v1.json). Values are
36
+ * snake_case and match the spec byte-for-byte; agents pass them verbatim
37
+ * via CLI flags.
38
+ */
39
+ export declare const EDUCATION_LEVELS: readonly ["less_than_secondary", "secondary", "some_post_secondary", "vocational_or_associate", "bachelor", "graduate"];
40
+ export type EducationLevel = typeof EDUCATION_LEVELS[number];
41
+ export declare const HOUSEHOLDS: readonly ["single", "couple_no_kids", "couple_with_kids", "single_parent", "shared_housing", "adult_with_parents", "multi_generational"];
42
+ export type Household = typeof HOUSEHOLDS[number];
43
+ export declare const LOCALE_TYPES: readonly ["urban", "suburban", "small_town", "rural"];
44
+ export type LocaleType = typeof LOCALE_TYPES[number];
45
+ export declare const INCOME_LEVELS: readonly ["lower", "lower_middle", "middle", "upper_middle", "upper", "prefer_not_to_say"];
46
+ export type IncomeLevel = typeof INCOME_LEVELS[number];
47
+ export declare const EMPLOYMENT_STATUSES: readonly ["employed_full_time", "employed_part_time", "self_employed", "unemployed_seeking", "student", "homemaker", "retired", "unable_to_work", "other"];
48
+ export type EmploymentStatus = typeof EMPLOYMENT_STATUSES[number];
49
+ /**
50
+ * Strict enum check for CLI flag values. Snake_case-only; no hyphen folding,
51
+ * because the spec values are the wire format and agents are expected to
52
+ * pass them verbatim.
53
+ */
54
+ export declare function assertEnumValue<T extends string>(raw: string, allowed: readonly T[], flagName: string): T;
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Enum-value normalisation for CLI flags.
3
+ *
4
+ * Agents (and humans) reach for hyphen-style values on the command line —
5
+ * `--screen-format mobile-portrait`, `--kind text-file`, `--chat-mode
6
+ * tester-pair` — even when the canonical backend value is underscored (or
7
+ * vice versa for the ask-question `type` field, which is hyphenated). Rather
8
+ * than fail with a 422, each parse site funnels the raw value through
9
+ * `normalizeEnumValue` and gets back the canonical form (or `null` for a
10
+ * genuine typo, so the caller can throw a clean ValidationError).
11
+ *
12
+ * Scope: apply this only to flags whose allowed values are explicitly enumerated
13
+ * client-side. Don't blanket-rewrite arbitrary strings — that would mask real
14
+ * typos (`mobil_portrait` ≠ `mobile_portrait`) and silently coerce values the
15
+ * backend hasn't agreed to.
16
+ */
17
+ /**
18
+ * Case-insensitively match `raw` against `allowed`, treating hyphens and
19
+ * underscores as interchangeable. Returns the canonical form from `allowed`
20
+ * if matched, `null` otherwise.
21
+ */
22
+ export function normalizeEnumValue(raw, allowed) {
23
+ if (raw === undefined || raw === null)
24
+ return null;
25
+ const key = String(raw).trim().toLowerCase().replace(/-/g, "_");
26
+ for (const a of allowed) {
27
+ if (a.toLowerCase().replace(/-/g, "_") === key)
28
+ return a;
29
+ }
30
+ return null;
31
+ }
32
+ /** Interactive-iteration screen format. Canonical underscored. */
33
+ export const SCREEN_FORMATS = ["desktop", "mobile_portrait"];
34
+ /**
35
+ * Interview-question types. Canonical is **hyphenated** for the multi-word
36
+ * values (`single-choice`, `multiple-choice`) — that's what the backend
37
+ * accepts. The normaliser folds underscored variants back to the canonical
38
+ * hyphenated form.
39
+ */
40
+ export const QUESTION_TYPES = [
41
+ "text",
42
+ "slider",
43
+ "likert",
44
+ "single-choice",
45
+ "multiple-choice",
46
+ "number",
47
+ ];
48
+ /**
49
+ * TesterProfile structured enums (profile-enums.v1.json). Values are
50
+ * snake_case and match the spec byte-for-byte; agents pass them verbatim
51
+ * via CLI flags.
52
+ */
53
+ export const EDUCATION_LEVELS = [
54
+ "less_than_secondary",
55
+ "secondary",
56
+ "some_post_secondary",
57
+ "vocational_or_associate",
58
+ "bachelor",
59
+ "graduate",
60
+ ];
61
+ export const HOUSEHOLDS = [
62
+ "single",
63
+ "couple_no_kids",
64
+ "couple_with_kids",
65
+ "single_parent",
66
+ "shared_housing",
67
+ "adult_with_parents",
68
+ "multi_generational",
69
+ ];
70
+ export const LOCALE_TYPES = ["urban", "suburban", "small_town", "rural"];
71
+ export const INCOME_LEVELS = [
72
+ "lower",
73
+ "lower_middle",
74
+ "middle",
75
+ "upper_middle",
76
+ "upper",
77
+ "prefer_not_to_say",
78
+ ];
79
+ export const EMPLOYMENT_STATUSES = [
80
+ "employed_full_time",
81
+ "employed_part_time",
82
+ "self_employed",
83
+ "unemployed_seeking",
84
+ "student",
85
+ "homemaker",
86
+ "retired",
87
+ "unable_to_work",
88
+ "other",
89
+ ];
90
+ /**
91
+ * Strict enum check for CLI flag values. Snake_case-only; no hyphen folding,
92
+ * because the spec values are the wire format and agents are expected to
93
+ * pass them verbatim.
94
+ */
95
+ export function assertEnumValue(raw, allowed, flagName) {
96
+ if (!allowed.includes(raw)) {
97
+ throw new Error(`Invalid ${flagName}: "${raw}". Allowed values: ${allowed.join(", ")}.`);
98
+ }
99
+ return raw;
100
+ }
@@ -8,10 +8,11 @@
8
8
  */
9
9
  import type { Page } from "playwright-core";
10
10
  import type { LocalStepAction, ActionResult, ContextValue, TreeData } from "./types.js";
11
+ import type { TabManager } from "./tabs.js";
11
12
  /**
12
13
  * Execute a single action on the page.
13
14
  */
14
- export declare function executeAction(page: Page, action: LocalStepAction, treeData: TreeData, contextValues: ContextValue[]): Promise<ActionResult>;
15
+ export declare function executeAction(page: Page, action: LocalStepAction, treeData: TreeData, contextValues: ContextValue[], tabs?: TabManager): Promise<ActionResult>;
15
16
  /**
16
17
  * Compare two base64 screenshots to detect visible change.
17
18
  */
@@ -8,6 +8,37 @@
8
8
  */
9
9
  import { resolveNodeToBoundingBox } from "./browser.js";
10
10
  import { isDebugEnabled } from "./debug.js";
11
+ // Agent-facing modifier names → Playwright keyboard modifier names.
12
+ // Mirrors backend `_PLAYWRIGHT_MODIFIERS` in
13
+ // ish-backend/app/simulation/executors/browser.py.
14
+ const PLAYWRIGHT_MODIFIERS = {
15
+ cmd: "Meta", ctrl: "Control", shift: "Shift", alt: "Alt",
16
+ };
17
+ function toPWModifiers(mods) {
18
+ if (!mods?.length)
19
+ return [];
20
+ const out = [];
21
+ for (const m of mods) {
22
+ const mapped = PLAYWRIGHT_MODIFIERS[m.toLowerCase()];
23
+ if (mapped)
24
+ out.push(mapped);
25
+ }
26
+ return out;
27
+ }
28
+ async function withModifiers(page, mods, fn) {
29
+ if (!mods.length)
30
+ return fn();
31
+ for (const m of mods)
32
+ await page.keyboard.down(m);
33
+ try {
34
+ return await fn();
35
+ }
36
+ finally {
37
+ for (const m of [...mods].reverse()) {
38
+ await page.keyboard.up(m).catch(() => { });
39
+ }
40
+ }
41
+ }
11
42
  // --- ARIA role → Playwright role mapping ---
12
43
  const ELEMENT_TYPE_TO_ROLE = {
13
44
  BUTTON: "button",
@@ -26,7 +57,7 @@ const ELEMENT_TYPE_TO_ROLE = {
26
57
  /**
27
58
  * Execute a single action on the page.
28
59
  */
29
- export async function executeAction(page, action, treeData, contextValues) {
60
+ export async function executeAction(page, action, treeData, contextValues, tabs) {
30
61
  try {
31
62
  // Intercept "back button" taps — the LLM often tries to tap the browser
32
63
  // back button which doesn't exist in the viewport. Convert to page.goBack().
@@ -71,6 +102,18 @@ export async function executeAction(page, action, treeData, contextValues) {
71
102
  case "think":
72
103
  // No-op: model is reasoning without acting
73
104
  break;
105
+ case "keyboard_shortcut":
106
+ await executeKeyboardShortcut(page, action);
107
+ break;
108
+ case "switch_tab":
109
+ case "close_tab":
110
+ if (tabs && action.tab_id) {
111
+ if (action.type === "switch_tab")
112
+ await tabs.switchTab(action.tab_id);
113
+ else
114
+ await tabs.closeTab(action.tab_id);
115
+ }
116
+ break;
74
117
  case "pinch_zoom":
75
118
  case "rotate_device":
76
119
  // Not supported in desktop browser
@@ -178,9 +221,12 @@ async function executeTap(page, action, treeData) {
178
221
  const count = action.count ?? 1;
179
222
  const coords = await resolveElement(page, action, treeData);
180
223
  if (coords) {
181
- for (let i = 0; i < count; i++) {
182
- await page.mouse.click(coords.x, coords.y);
183
- }
224
+ const pwMods = toPWModifiers(action.modifiers);
225
+ await withModifiers(page, pwMods, async () => {
226
+ for (let i = 0; i < count; i++) {
227
+ await page.mouse.click(coords.x, coords.y);
228
+ }
229
+ });
184
230
  return coords;
185
231
  }
186
232
  else {
@@ -295,22 +341,40 @@ async function executeLongPress(page, action, treeData) {
295
341
  const coords = await resolveElement(page, action, treeData);
296
342
  if (!coords)
297
343
  throw new Error(`Cannot locate element for long press: ${action.element_name ?? "unknown"}`);
298
- await page.mouse.move(coords.x, coords.y);
299
- await page.mouse.down();
300
- await page.waitForTimeout(action.duration_ms ?? 500);
301
- await page.mouse.up();
344
+ const pwMods = toPWModifiers(action.modifiers);
345
+ await withModifiers(page, pwMods, async () => {
346
+ await page.mouse.move(coords.x, coords.y);
347
+ await page.mouse.down();
348
+ await page.waitForTimeout(action.duration_ms ?? 500);
349
+ await page.mouse.up();
350
+ });
302
351
  return coords;
303
352
  }
304
353
  async function executeDoubleTap(page, action, treeData) {
305
354
  const coords = await resolveElement(page, action, treeData);
306
355
  if (coords) {
307
- await page.mouse.dblclick(coords.x, coords.y);
356
+ const pwMods = toPWModifiers(action.modifiers);
357
+ await withModifiers(page, pwMods, async () => {
358
+ await page.mouse.dblclick(coords.x, coords.y);
359
+ });
308
360
  return coords;
309
361
  }
310
362
  else {
311
363
  throw new Error(`Cannot locate element for double tap: ${action.element_name ?? "unknown"}`);
312
364
  }
313
365
  }
366
+ /**
367
+ * Press a key combination on the currently focused element.
368
+ * Mirrors backend BrowserActionExecutor.execute_keyboard_shortcut.
369
+ */
370
+ async function executeKeyboardShortcut(page, action) {
371
+ if (!action.key) {
372
+ throw new Error("keyboard_shortcut missing key");
373
+ }
374
+ const pwMods = toPWModifiers(action.modifiers);
375
+ const combo = pwMods.length ? `${pwMods.join("+")}+${action.key}` : action.key;
376
+ await page.keyboard.press(combo);
377
+ }
314
378
  // --- Helpers ---
315
379
  /**
316
380
  * Resolve the actual text to type from an action, handling var/secret value types.
@@ -343,11 +407,12 @@ export function detectNoVisibleChange(before, after) {
343
407
  */
344
408
  export function describeAction(action) {
345
409
  const element = action.element_name || "element";
410
+ const modSuffix = action.modifiers?.length ? ` [${action.modifiers.join("+")}]` : "";
346
411
  switch (action.type) {
347
412
  case "tap":
348
413
  return action.count && action.count > 1
349
- ? `tap on '${element}' x${action.count}`
350
- : `tap on '${element}'`;
414
+ ? `tap on '${element}' x${action.count}${modSuffix}`
415
+ : `tap on '${element}'${modSuffix}`;
351
416
  case "text_input": {
352
417
  const val = action.value_type === "secret" ? "***" : `"${(action.value ?? "").slice(0, 30)}"`;
353
418
  const modeStr = action.mode ? ` (${action.mode}${action.submit ? ", submit" : ""})` : "";
@@ -364,15 +429,25 @@ export function describeAction(action) {
364
429
  case "navigate_back":
365
430
  return "navigate back";
366
431
  case "long_press":
367
- return `long_press on '${element}'`;
432
+ return `long_press on '${element}'${modSuffix}`;
368
433
  case "double_tap":
369
- return `double_tap on '${element}'`;
434
+ return `double_tap on '${element}'${modSuffix}`;
370
435
  case "drag":
371
436
  return `drag '${element}'`;
372
437
  case "think":
373
438
  return `think: "${(action.thoughts ?? "").slice(0, 50)}"`;
374
439
  case "pull_to_refresh":
375
440
  return "pull_to_refresh";
441
+ case "keyboard_shortcut": {
442
+ const combo = action.modifiers?.length
443
+ ? `${action.modifiers.join("+")}+${action.key ?? "?"}`
444
+ : (action.key ?? "?");
445
+ return `keyboard_shortcut '${combo}'`;
446
+ }
447
+ case "switch_tab":
448
+ return `switch_tab '${action.tab_id ?? "?"}'`;
449
+ case "close_tab":
450
+ return `close_tab '${action.tab_id ?? "?"}'`;
376
451
  default:
377
452
  return `${action.type} on '${element}'`;
378
453
  }
@@ -7,6 +7,7 @@
7
7
  import { launchBrowser, launchSharedBrowser, createTab, captureObservation, takeScreenshot, takeScreenshotJpeg, navigateWithRetry, closeBrowser } from "./browser.js";
8
8
  import { uploadScreenshot } from "./upload.js";
9
9
  import { executeAction, detectNoVisibleChange, describeAction } from "./actions.js";
10
+ import { TabManager } from "./tabs.js";
10
11
  import { enableDebug, isDebugEnabled, debugObservation, debugRawResponse, debugNormalizedActions, debugActionExecution, debugForwards, debugStepSummary, debugRecord, } from "./debug.js";
11
12
  /**
12
13
  * Convert a raw action (from either resolved_actions or output.action.actions)
@@ -32,6 +33,9 @@ function flattenAction(raw, nodeId = null, nodeDescription = null) {
32
33
  count: a.count ?? null,
33
34
  duration_ms: a.duration_ms ?? null,
34
35
  thoughts: a.thoughts ?? null,
36
+ modifiers: Array.isArray(a.modifiers) ? a.modifiers : null,
37
+ key: a.key ?? null,
38
+ tab_id: a.tab_id ?? null,
35
39
  };
36
40
  }
37
41
  /**
@@ -199,7 +203,10 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
199
203
  const browserSession = sharedBrowser
200
204
  ? await createTab(sharedBrowser, browserOpts)
201
205
  : await launchBrowser(browserOpts);
202
- const { page } = browserSession;
206
+ // Active page can swap when a popup auto-focuses or the LLM issues
207
+ // switch_tab/close_tab. TabManager wires the context popup listener.
208
+ const tabs = new TabManager(browserSession.context, browserSession.page);
209
+ let page = tabs.activePage();
203
210
  const history = [];
204
211
  const interactions = [];
205
212
  const debugSteps = [];
@@ -218,7 +225,8 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
218
225
  let step = 0;
219
226
  let assignmentCompleted = false;
220
227
  while (step < maxSteps && !assignmentCompleted && !isCancelled()) {
221
- // OBSERVE
228
+ // OBSERVE — refresh active page in case a popup or switch_tab changed it
229
+ page = tabs.activePage();
222
230
  const obs = await captureObservation(page);
223
231
  const lastTreeData = obs.treeData;
224
232
  const currentScreenshot = obs.screenshot;
@@ -236,6 +244,9 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
236
244
  if (forwards.length > 0)
237
245
  debugForwards(forwards);
238
246
  const viewportSize = page.viewportSize() ?? viewport;
247
+ // Snapshot open tabs so the backend can prompt the LLM with tab ids
248
+ // (used by switch_tab/close_tab and to disambiguate cmd+click results).
249
+ const tabsSnapshot = await tabs.list();
239
250
  // REASON (remote)
240
251
  let stepResponse;
241
252
  try {
@@ -258,6 +269,7 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
258
269
  agent_model: session.agent_model,
259
270
  dom_model: session.dom_model,
260
271
  llm_provider: session.llm_provider,
272
+ tabs: tabsSnapshot,
261
273
  };
262
274
  stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
263
275
  }
@@ -285,6 +297,7 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
285
297
  agent_model: session.agent_model,
286
298
  dom_model: session.dom_model,
287
299
  llm_provider: session.llm_provider,
300
+ tabs: tabsSnapshot,
288
301
  };
289
302
  stepResponse = normalizeStepResponse(await client.localSimStep(stepReqBody));
290
303
  }
@@ -304,10 +317,17 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
304
317
  for (let i = 0; i < stepResponse.actions.length; i++) {
305
318
  if (isCancelled())
306
319
  break;
320
+ // Pick up popup auto-switch / explicit tab switch from prior actions.
321
+ page = tabs.activePage();
307
322
  const action = stepResponse.actions[i];
308
- const result = await executeAction(page, action, lastTreeData, session.context_values);
323
+ const tabsBefore = (await tabs.list()).length;
324
+ const result = await executeAction(page, action, lastTreeData, session.context_values, tabs);
309
325
  const desc = describeAction(action);
310
326
  debugActionExecution(i, action, result, action.node_id ? "cdp" : "playwright");
327
+ // The action may have flipped the active tab — re-read.
328
+ page = tabs.activePage();
329
+ const tabsAfter = (await tabs.list()).length;
330
+ const openedNewTab = action.type === "tap" && tabsAfter > tabsBefore;
311
331
  let normalizedCoords = null;
312
332
  if (result.coordinates) {
313
333
  const vp = page.viewportSize() ?? viewport;
@@ -317,22 +337,29 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
317
337
  };
318
338
  }
319
339
  const actionType = action.type || "unknown";
320
- actionDatas.push({
321
- action_type: actionType,
322
- element_label: action.element_name ?? null,
323
- element_type: action.element_type ?? null,
324
- coordinates: normalizedCoords,
325
- data: {
326
- ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
327
- ...(action.mode && { mode: action.mode }),
328
- ...(action.submit && { submit: action.submit }),
329
- ...(action.direction && { direction: action.direction }),
330
- ...(action.amount && { amount: action.amount }),
331
- ...(action.count && action.count > 1 && { count: action.count }),
332
- ...(action.duration_ms && { duration_ms: action.duration_ms }),
333
- },
334
- order: i,
335
- });
340
+ const INTERNAL_ACTIONS = new Set(["think"]);
341
+ if (!INTERNAL_ACTIONS.has(actionType)) {
342
+ actionDatas.push({
343
+ action_type: actionType,
344
+ element_label: action.element_name ?? null,
345
+ element_type: action.element_type ?? null,
346
+ coordinates: normalizedCoords,
347
+ data: {
348
+ ...(action.value !== undefined && action.value !== null && { value: action.value_type === "secret" ? "***" : action.value }),
349
+ ...(action.mode && { mode: action.mode }),
350
+ ...(action.submit && { submit: action.submit }),
351
+ ...(action.direction && { direction: action.direction }),
352
+ ...(action.amount && { amount: action.amount }),
353
+ ...(action.count && action.count > 1 && { count: action.count }),
354
+ ...(action.duration_ms && { duration_ms: action.duration_ms }),
355
+ ...(action.modifiers?.length && { modifiers: action.modifiers }),
356
+ ...(action.key && { key: action.key }),
357
+ ...(action.tab_id && { tab_id: action.tab_id }),
358
+ ...(openedNewTab && { opened_new_tab: true }),
359
+ },
360
+ order: i,
361
+ });
362
+ }
336
363
  actionDebugEntries.push({
337
364
  type: actionType,
338
365
  elementName: action.element_name ?? null,
@@ -422,6 +449,9 @@ async function runSingleSimulation(client, testerId, testerName, opts, log, isCa
422
449
  actions: actionDatas,
423
450
  current_location: stepResponse.current_location,
424
451
  assignment_completed: stepResponse.assignment_completed,
452
+ // Server reduces this to Interaction.tab when N >= 2; omit on
453
+ // single-tab steps to keep the payload (and DB column) null.
454
+ ...(tabsSnapshot.length >= 2 ? { tabs: tabsSnapshot } : {}),
425
455
  });
426
456
  // Update history for next step
427
457
  history.push({
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Tab manager — tracks open pages with stable ids, auto-switches on popup,
3
+ * falls back to the opener tab when the active tab closes.
4
+ *
5
+ * Mirrors backend semantics in
6
+ * `ish-backend/app/simulation/computers/browser/computer.py` (TabRecord +
7
+ * _register_tab / _on_new_page / _set_active_tab / _handle_tab_closed).
8
+ */
9
+ import type { BrowserContext, Page } from "playwright-core";
10
+ import type { LocalTabInfo } from "./types.js";
11
+ export declare class TabManager {
12
+ private readonly context;
13
+ private readonly tabs;
14
+ private activeTabId;
15
+ private counter;
16
+ constructor(context: BrowserContext, initialPage: Page);
17
+ activePage(): Page;
18
+ activeId(): string | null;
19
+ list(): Promise<LocalTabInfo[]>;
20
+ switchTab(tabId: string): Promise<void>;
21
+ closeTab(tabId: string): Promise<void>;
22
+ private registerTab;
23
+ private onNewPage;
24
+ private afterNewPageFocus;
25
+ private setActiveTab;
26
+ private handleTabClosed;
27
+ }
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Tab manager — tracks open pages with stable ids, auto-switches on popup,
3
+ * falls back to the opener tab when the active tab closes.
4
+ *
5
+ * Mirrors backend semantics in
6
+ * `ish-backend/app/simulation/computers/browser/computer.py` (TabRecord +
7
+ * _register_tab / _on_new_page / _set_active_tab / _handle_tab_closed).
8
+ */
9
+ import { isDebugEnabled } from "./debug.js";
10
+ const MAX_OPEN_TABS = 10;
11
+ const NEW_TAB_LOAD_TIMEOUT_MS = 5000;
12
+ export class TabManager {
13
+ context;
14
+ tabs = new Map();
15
+ activeTabId = null;
16
+ counter = 0;
17
+ constructor(context, initialPage) {
18
+ this.context = context;
19
+ this.context.on("page", (page) => this.onNewPage(page));
20
+ const id = this.registerTab(initialPage, null);
21
+ this.activeTabId = id;
22
+ }
23
+ activePage() {
24
+ if (!this.activeTabId) {
25
+ throw new Error("No active tab");
26
+ }
27
+ const rec = this.tabs.get(this.activeTabId);
28
+ if (!rec)
29
+ throw new Error(`Active tab ${this.activeTabId} not registered`);
30
+ return rec.page;
31
+ }
32
+ activeId() {
33
+ return this.activeTabId;
34
+ }
35
+ async list() {
36
+ const out = [];
37
+ const sorted = [...this.tabs.values()].sort((a, b) => a.openedAt - b.openedAt);
38
+ for (const rec of sorted) {
39
+ if (rec.page.isClosed())
40
+ continue;
41
+ let title = "";
42
+ try {
43
+ title = await rec.page.title();
44
+ }
45
+ catch {
46
+ // ignore
47
+ }
48
+ out.push({
49
+ id: rec.id,
50
+ title: title.slice(0, 80),
51
+ url: rec.page.url(),
52
+ active: rec.id === this.activeTabId,
53
+ opener_id: rec.parentId && this.tabs.has(rec.parentId) ? rec.parentId : null,
54
+ });
55
+ }
56
+ return out;
57
+ }
58
+ async switchTab(tabId) {
59
+ await this.setActiveTab(tabId);
60
+ }
61
+ async closeTab(tabId) {
62
+ const rec = this.tabs.get(tabId);
63
+ if (!rec) {
64
+ throw new Error(`Unknown tab id: ${tabId}`);
65
+ }
66
+ await rec.page.close().catch(() => { });
67
+ // page.on('close') drives the rest of the bookkeeping.
68
+ }
69
+ registerTab(page, parentId) {
70
+ this.counter += 1;
71
+ const id = `t${this.counter}`;
72
+ this.tabs.set(id, { id, page, parentId, openedAt: Date.now() });
73
+ page.on("close", () => {
74
+ void this.handleTabClosed(id);
75
+ });
76
+ return id;
77
+ }
78
+ onNewPage(page) {
79
+ if (this.tabs.size >= MAX_OPEN_TABS) {
80
+ if (isDebugEnabled()) {
81
+ console.error(` [tabs] cap reached (${this.tabs.size}), closing extra popup`);
82
+ }
83
+ void page.close().catch(() => { });
84
+ return;
85
+ }
86
+ const parentId = this.activeTabId;
87
+ const tabId = this.registerTab(page, parentId);
88
+ if (isDebugEnabled()) {
89
+ console.error(` [tabs] new tab ${tabId} (parent=${parentId ?? "-"}, url=${page.url()})`);
90
+ }
91
+ void this.afterNewPageFocus(tabId, page);
92
+ }
93
+ async afterNewPageFocus(tabId, page) {
94
+ try {
95
+ await page.waitForLoadState("domcontentloaded", { timeout: NEW_TAB_LOAD_TIMEOUT_MS });
96
+ }
97
+ catch {
98
+ if (isDebugEnabled()) {
99
+ console.error(` [tabs] tab ${tabId} did not reach domcontentloaded in ${NEW_TAB_LOAD_TIMEOUT_MS}ms`);
100
+ }
101
+ }
102
+ // The tab may have closed during the wait (OAuth pop-then-redirect, etc).
103
+ if (!this.tabs.has(tabId))
104
+ return;
105
+ await this.setActiveTab(tabId);
106
+ }
107
+ async setActiveTab(tabId) {
108
+ const rec = this.tabs.get(tabId);
109
+ if (!rec) {
110
+ throw new Error(`Unknown tab id: ${tabId}`);
111
+ }
112
+ if (rec.page.isClosed()) {
113
+ await this.handleTabClosed(tabId);
114
+ return;
115
+ }
116
+ this.activeTabId = tabId;
117
+ try {
118
+ await rec.page.bringToFront();
119
+ }
120
+ catch {
121
+ // Headless or transient — non-fatal.
122
+ }
123
+ if (isDebugEnabled()) {
124
+ console.error(` [tabs] active → ${tabId} (${rec.page.url()})`);
125
+ }
126
+ }
127
+ async handleTabClosed(tabId) {
128
+ const rec = this.tabs.get(tabId);
129
+ if (!rec)
130
+ return;
131
+ this.tabs.delete(tabId);
132
+ if (this.activeTabId !== tabId) {
133
+ // A background tab closed — no focus change needed.
134
+ return;
135
+ }
136
+ // Prefer the opener tab if it's still around. If not, pick the most
137
+ // recently opened survivor (closest in time to the just-closed one).
138
+ let fallbackId = rec.parentId && this.tabs.has(rec.parentId) ? rec.parentId : null;
139
+ if (!fallbackId && this.tabs.size > 0) {
140
+ let newest = null;
141
+ for (const t of this.tabs.values()) {
142
+ if (!newest || t.openedAt > newest.openedAt)
143
+ newest = t;
144
+ }
145
+ fallbackId = newest?.id ?? null;
146
+ }
147
+ if (fallbackId) {
148
+ await this.setActiveTab(fallbackId);
149
+ }
150
+ else {
151
+ this.activeTabId = null;
152
+ if (isDebugEnabled()) {
153
+ console.error(` [tabs] all tabs closed; no active page remains`);
154
+ }
155
+ }
156
+ }
157
+ }