assistme 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -493,11 +493,20 @@ URL: ${info.url}`;
493
493
  }
494
494
  async goBack() {
495
495
  this.ensureConnected();
496
- await this.send("Page.navigateToHistoryEntry", {
497
- entryId: -1
498
- }).catch(() => {
499
- });
500
- await this.evaluate("window.history.back()");
496
+ try {
497
+ const history = await this.send("Page.getNavigationHistory");
498
+ const idx = history.currentIndex ?? 0;
499
+ const entries = history.entries ?? [];
500
+ if (idx > 0 && entries[idx - 1]) {
501
+ await this.send("Page.navigateToHistoryEntry", {
502
+ entryId: entries[idx - 1].id
503
+ });
504
+ } else {
505
+ await this.evaluate("window.history.back()");
506
+ }
507
+ } catch {
508
+ await this.evaluate("window.history.back()");
509
+ }
501
510
  await this.waitForLoad();
502
511
  const info = await this.getPageInfo();
503
512
  return `Went back to: ${info.title}`;
@@ -665,29 +674,80 @@ URL: ${info.url}`;
665
674
  Tab: { keyCode: 9, code: "Tab" },
666
675
  Escape: { keyCode: 27, code: "Escape" },
667
676
  Backspace: { keyCode: 8, code: "Backspace" },
677
+ Delete: { keyCode: 46, code: "Delete" },
668
678
  ArrowDown: { keyCode: 40, code: "ArrowDown" },
669
- ArrowUp: { keyCode: 38, code: "ArrowUp" }
679
+ ArrowUp: { keyCode: 38, code: "ArrowUp" },
680
+ ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
681
+ ArrowRight: { keyCode: 39, code: "ArrowRight" },
682
+ Home: { keyCode: 36, code: "Home" },
683
+ End: { keyCode: 35, code: "End" },
684
+ Space: { keyCode: 32, code: "Space" }
685
+ };
686
+ const modifierMap = {
687
+ Alt: 1,
688
+ Control: 2,
689
+ Meta: 4,
690
+ Shift: 8
670
691
  };
671
- const mapped = keyMap[key];
692
+ const parts = key.split("+");
693
+ let modifiers = 0;
694
+ let actualKey = parts[parts.length - 1];
695
+ for (let i = 0; i < parts.length - 1; i++) {
696
+ const mod = modifierMap[parts[i]];
697
+ if (mod) modifiers |= mod;
698
+ }
699
+ const mapped = keyMap[actualKey];
672
700
  if (mapped) {
673
701
  await this.send("Input.dispatchKeyEvent", {
674
702
  type: "keyDown",
675
- key,
703
+ key: actualKey,
676
704
  code: mapped.code,
677
705
  windowsVirtualKeyCode: mapped.keyCode,
678
- nativeVirtualKeyCode: mapped.keyCode
706
+ nativeVirtualKeyCode: mapped.keyCode,
707
+ modifiers
679
708
  });
680
709
  await this.send("Input.dispatchKeyEvent", {
681
710
  type: "keyUp",
682
- key,
711
+ key: actualKey,
683
712
  code: mapped.code,
684
713
  windowsVirtualKeyCode: mapped.keyCode,
685
- nativeVirtualKeyCode: mapped.keyCode
714
+ nativeVirtualKeyCode: mapped.keyCode,
715
+ modifiers
716
+ });
717
+ } else if (actualKey.length === 1) {
718
+ const code = `Key${actualKey.toUpperCase()}`;
719
+ const keyCode = actualKey.toUpperCase().charCodeAt(0);
720
+ await this.send("Input.dispatchKeyEvent", {
721
+ type: "keyDown",
722
+ key: actualKey,
723
+ code,
724
+ windowsVirtualKeyCode: keyCode,
725
+ nativeVirtualKeyCode: keyCode,
726
+ modifiers
727
+ });
728
+ if (!modifiers) {
729
+ await this.send("Input.dispatchKeyEvent", {
730
+ type: "char",
731
+ text: actualKey,
732
+ modifiers
733
+ });
734
+ }
735
+ await this.send("Input.dispatchKeyEvent", {
736
+ type: "keyUp",
737
+ key: actualKey,
738
+ code,
739
+ modifiers
686
740
  });
687
741
  } else {
688
742
  await this.send("Input.dispatchKeyEvent", {
689
- type: "char",
690
- text: key
743
+ type: "keyDown",
744
+ key: actualKey,
745
+ modifiers
746
+ });
747
+ await this.send("Input.dispatchKeyEvent", {
748
+ type: "keyUp",
749
+ key: actualKey,
750
+ modifiers
691
751
  });
692
752
  }
693
753
  return `Pressed key: ${key}`;
@@ -1061,12 +1121,17 @@ Refs:
1061
1121
  */
1062
1122
  async clickRef(refId) {
1063
1123
  this.ensureConnected();
1124
+ const ref = this.refCache.get(refId);
1125
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1064
1126
  const maxRetries = 3;
1065
1127
  let lastError = "";
1066
1128
  for (let attempt = 0; attempt < maxRetries; attempt++) {
1067
1129
  const resolved = await this.resolveRef(refId);
1068
1130
  if (!resolved) {
1069
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
1131
+ return {
1132
+ success: false,
1133
+ message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`
1134
+ };
1070
1135
  }
1071
1136
  if (resolved.error) {
1072
1137
  lastError = resolved.error;
@@ -1074,8 +1139,7 @@ Refs:
1074
1139
  await new Promise((r) => setTimeout(r, 500));
1075
1140
  continue;
1076
1141
  }
1077
- const ref3 = this.refCache.get(refId);
1078
- return `Cannot click [${refId}] ${ref3?.role || ""} "${ref3?.name || ""}": ${lastError}`;
1142
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
1079
1143
  }
1080
1144
  if (attempt === 0) {
1081
1145
  await new Promise((r) => setTimeout(r, 50));
@@ -1105,11 +1169,9 @@ Refs:
1105
1169
  clickCount: 1
1106
1170
  });
1107
1171
  await new Promise((r) => setTimeout(r, 300));
1108
- const ref2 = this.refCache.get(refId);
1109
- return `Clicked [${refId}] ${ref2?.role || ""} "${ref2?.name || ""}"`;
1172
+ return { success: true, message: `Clicked ${refLabel}` };
1110
1173
  }
1111
- const ref = this.refCache.get(refId);
1112
- return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
1174
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
1113
1175
  }
1114
1176
  /**
1115
1177
  * Type text into an element by ref using CDP Input events.
@@ -1118,37 +1180,49 @@ Refs:
1118
1180
  */
1119
1181
  async typeRef(refId, text) {
1120
1182
  this.ensureConnected();
1183
+ const ref = this.refCache.get(refId);
1184
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1121
1185
  const clickResult = await this.clickRef(refId);
1122
- if (clickResult.includes("not found")) return clickResult;
1186
+ if (!clickResult.success) return clickResult;
1123
1187
  await new Promise((r) => setTimeout(r, 100));
1124
- const modifier = platform() === "darwin" ? 4 : 2;
1125
- await this.send("Input.dispatchKeyEvent", {
1126
- type: "keyDown",
1127
- modifiers: modifier,
1128
- key: "a",
1129
- code: "KeyA",
1130
- windowsVirtualKeyCode: 65
1131
- });
1132
- await this.send("Input.dispatchKeyEvent", {
1133
- type: "keyUp",
1134
- key: "a",
1135
- code: "KeyA"
1136
- });
1137
- await this.send("Input.dispatchKeyEvent", {
1138
- type: "keyDown",
1139
- key: "Backspace",
1140
- code: "Backspace",
1141
- windowsVirtualKeyCode: 8
1142
- });
1143
- await this.send("Input.dispatchKeyEvent", {
1144
- type: "keyUp",
1145
- key: "Backspace",
1146
- code: "Backspace"
1188
+ const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
1189
+ await this.pressKey(selectAllKey);
1190
+ await new Promise((r) => setTimeout(r, 50));
1191
+ await this.pressKey("Backspace");
1192
+ await new Promise((r) => setTimeout(r, 50));
1193
+ const cleared = await this.send("Runtime.evaluate", {
1194
+ expression: `
1195
+ (function() {
1196
+ var el = document.querySelector('[data-assistme-ref="${refId}"]');
1197
+ if (!el) return 'no_element';
1198
+ if (el.value !== undefined && el.value !== '') {
1199
+ // Ctrl+A didn't work (some frameworks intercept it) \u2014 clear via JS
1200
+ var setter = Object.getOwnPropertyDescriptor(
1201
+ window.HTMLInputElement.prototype, 'value'
1202
+ )?.set || Object.getOwnPropertyDescriptor(
1203
+ window.HTMLTextAreaElement.prototype, 'value'
1204
+ )?.set;
1205
+ if (setter) setter.call(el, '');
1206
+ else el.value = '';
1207
+ el.dispatchEvent(new Event('input', { bubbles: true }));
1208
+ el.dispatchEvent(new Event('change', { bubbles: true }));
1209
+ return 'js_cleared';
1210
+ }
1211
+ return 'ok';
1212
+ })()
1213
+ `,
1214
+ returnByValue: true
1147
1215
  });
1216
+ const clearStatus = cleared.result?.value || "ok";
1217
+ if (clearStatus === "no_element") {
1218
+ return {
1219
+ success: false,
1220
+ message: `Ref ${refLabel} not found after click. Take a new snapshot.`
1221
+ };
1222
+ }
1148
1223
  await this.send("Input.insertText", { text });
1149
1224
  await new Promise((r) => setTimeout(r, 100));
1150
- const ref = this.refCache.get(refId);
1151
- return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1225
+ return { success: true, message: `Typed "${text}" into ${refLabel}` };
1152
1226
  }
1153
1227
  /**
1154
1228
  * Select a dropdown option by ref. Delegates to selectOption with the
@@ -1159,13 +1233,16 @@ Refs:
1159
1233
  this.ensureConnected();
1160
1234
  const cached = this.refCache.get(refId);
1161
1235
  if (!cached) {
1162
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
1236
+ return {
1237
+ success: false,
1238
+ message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`
1239
+ };
1163
1240
  }
1241
+ const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
1164
1242
  const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
1165
- return result.replace(
1166
- /\[data-assistme-ref="\d+"\]/,
1167
- `[${refId}] ${cached.role} "${cached.name}"`
1168
- );
1243
+ const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
1244
+ const success = !result.includes("not found");
1245
+ return { success, message };
1169
1246
  }
1170
1247
  // ── Action Pipeline ───────────────────────────────────────────────
1171
1248
  /**
@@ -1183,18 +1260,24 @@ Refs:
1183
1260
  let success = true;
1184
1261
  try {
1185
1262
  switch (spec.action) {
1186
- case "click":
1187
- result = await this.clickRef(spec.ref);
1188
- success = !result.includes("not found");
1263
+ case "click": {
1264
+ const r = await this.clickRef(spec.ref);
1265
+ result = r.message;
1266
+ success = r.success;
1189
1267
  break;
1190
- case "type":
1191
- result = await this.typeRef(spec.ref, spec.text);
1192
- success = !result.includes("not found");
1268
+ }
1269
+ case "type": {
1270
+ const r = await this.typeRef(spec.ref, spec.text);
1271
+ result = r.message;
1272
+ success = r.success;
1193
1273
  break;
1194
- case "select":
1195
- result = await this.selectRef(spec.ref, spec.option);
1196
- success = !result.includes("not found");
1274
+ }
1275
+ case "select": {
1276
+ const r = await this.selectRef(spec.ref, spec.option);
1277
+ result = r.message;
1278
+ success = r.success;
1197
1279
  break;
1280
+ }
1198
1281
  case "press":
1199
1282
  result = await this.pressKey(spec.key);
1200
1283
  break;
@@ -1269,15 +1352,24 @@ Refs:
1269
1352
  // Strategy 2: Custom dropdown \u2014 find the trigger element
1270
1353
  var trigger = selectEl;
1271
1354
  if (!trigger) {
1272
- // Try finding by label/placeholder text
1273
- var allEls = document.querySelectorAll('*');
1274
- for (var j = 0; j < allEls.length; j++) {
1275
- var el = allEls[j];
1355
+ // Try finding by aria-label first (fast, indexed)
1356
+ trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
1357
+ }
1358
+ if (!trigger) {
1359
+ // Try finding by label/placeholder text in likely dropdown elements
1360
+ var dropdownCandidates = document.querySelectorAll(
1361
+ 'button, [role="combobox"], [role="listbox"], [role="button"], ' +
1362
+ 'select, input, .MuiSelect-root, .MuiInput-root, ' +
1363
+ '[class*="select"], [class*="dropdown"], [class*="picker"]'
1364
+ );
1365
+ for (var j = 0; j < dropdownCandidates.length; j++) {
1366
+ var el = dropdownCandidates[j];
1276
1367
  var ownText = Array.from(el.childNodes)
1277
1368
  .filter(function(n) { return n.nodeType === 3; })
1278
1369
  .map(function(n) { return n.textContent.trim(); })
1279
1370
  .join('');
1280
- if (ownText === sel || el.getAttribute('aria-label') === sel) {
1371
+ if (ownText === sel || el.getAttribute('aria-label') === sel ||
1372
+ el.getAttribute('placeholder') === sel) {
1281
1373
  trigger = el;
1282
1374
  break;
1283
1375
  }
@@ -1314,10 +1406,13 @@ Refs:
1314
1406
  }
1315
1407
  }
1316
1408
 
1317
- // Broader search: any visible element with exact text match
1318
- var everything = document.querySelectorAll('*');
1319
- for (var m = 0; m < everything.length; m++) {
1320
- var candidate = everything[m];
1409
+ // Broader search: visible leaf elements in interactive containers
1410
+ var broadCandidates = document.querySelectorAll(
1411
+ 'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
1412
+ '[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
1413
+ );
1414
+ for (var m = 0; m < broadCandidates.length; m++) {
1415
+ var candidate = broadCandidates[m];
1321
1416
  if (candidate.textContent && candidate.textContent.trim() === optText &&
1322
1417
  candidate.offsetParent !== null && candidate.children.length === 0) {
1323
1418
  candidate.click();
@@ -1390,6 +1485,7 @@ Refs:
1390
1485
  // ── Helpers ─────────────────────────────────────────────────────
1391
1486
  async waitForLoad(timeoutMs = 8e3) {
1392
1487
  const start = Date.now();
1488
+ let sawInteractive = false;
1393
1489
  while (Date.now() - start < timeoutMs) {
1394
1490
  try {
1395
1491
  const result = await this.send("Runtime.evaluate", {
@@ -1397,67 +1493,22 @@ Refs:
1397
1493
  returnByValue: true
1398
1494
  });
1399
1495
  const state = result.result?.value;
1400
- if (state === "complete" || state === "interactive") {
1401
- await new Promise((r) => setTimeout(r, 500));
1496
+ if (state === "complete") {
1497
+ await new Promise((r) => setTimeout(r, 300));
1402
1498
  return;
1403
1499
  }
1500
+ if (state === "interactive") {
1501
+ if (!sawInteractive) {
1502
+ sawInteractive = true;
1503
+ }
1504
+ }
1404
1505
  } catch {
1405
1506
  }
1406
1507
  await new Promise((r) => setTimeout(r, 300));
1407
1508
  }
1408
- }
1409
- /**
1410
- * Find interactive elements on the page for the AI to understand what's clickable
1411
- */
1412
- async getInteractiveElements() {
1413
- this.ensureConnected();
1414
- const result = await this.send("Runtime.evaluate", {
1415
- expression: `
1416
- (function() {
1417
- const elements = [];
1418
- const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
1419
- const all = document.querySelectorAll(selectors);
1420
- for (let i = 0; i < all.length && elements.length < 50; i++) {
1421
- const el = all[i];
1422
- const rect = el.getBoundingClientRect();
1423
- if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
1424
-
1425
- // Build a reliable CSS selector
1426
- let selector;
1427
- if (el.id) {
1428
- selector = '#' + CSS.escape(el.id);
1429
- } else if (el.getAttribute('data-testid')) {
1430
- selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
1431
- } else {
1432
- // Build a path-based selector: find nth-of-type among siblings
1433
- const tag = el.tagName.toLowerCase();
1434
- const parent = el.parentElement;
1435
- if (parent) {
1436
- const siblings = parent.querySelectorAll(':scope > ' + tag);
1437
- const idx = Array.from(siblings).indexOf(el) + 1;
1438
- selector = tag + ':nth-of-type(' + idx + ')';
1439
- } else {
1440
- selector = tag;
1441
- }
1442
- }
1443
-
1444
- elements.push({
1445
- tag: el.tagName.toLowerCase(),
1446
- text: (el.textContent || '').trim().slice(0, 80),
1447
- type: el.getAttribute('type') || '',
1448
- name: el.getAttribute('name') || '',
1449
- id: el.id || '',
1450
- href: el.getAttribute('href') || '',
1451
- placeholder: el.getAttribute('placeholder') || '',
1452
- selector: selector,
1453
- });
1454
- }
1455
- return JSON.stringify(elements, null, 2);
1456
- })()
1457
- `,
1458
- returnByValue: true
1459
- });
1460
- return result.result?.value || "[]";
1509
+ if (sawInteractive) {
1510
+ await new Promise((r) => setTimeout(r, 300));
1511
+ }
1461
1512
  }
1462
1513
  isConnected() {
1463
1514
  return this.connected && this.ws?.readyState === WebSocket.OPEN;
@@ -1796,12 +1847,14 @@ async function ensureBrowserAvailable(port = 9222) {
1796
1847
  detail: "Could not start browser with remote debugging. Possible causes:\n 1) Another assistme debug browser is already using port " + port + "\n 2) The browser crashed on startup\nTry: rm -rf ~/.assistme/browser-profile && assistme"
1797
1848
  };
1798
1849
  }
1799
- var browserInstance = null;
1850
+ var browserInstances = /* @__PURE__ */ new Map();
1800
1851
  function getBrowser(port = 9222) {
1801
- if (!browserInstance) {
1802
- browserInstance = new BrowserController(port);
1852
+ let instance = browserInstances.get(port);
1853
+ if (!instance) {
1854
+ instance = new BrowserController(port);
1855
+ browserInstances.set(port, instance);
1803
1856
  }
1804
- return browserInstance;
1857
+ return instance;
1805
1858
  }
1806
1859
 
1807
1860
  // src/commands/browser.ts
@@ -3508,9 +3561,6 @@ async function executeTool(name, input) {
3508
3561
  case "browser_scroll":
3509
3562
  await ensureConnected(browser);
3510
3563
  return input.direction === "up" ? browser.scrollUp() : browser.scrollDown();
3511
- case "browser_get_elements":
3512
- await ensureConnected(browser);
3513
- return browser.getInteractiveElements();
3514
3564
  case "browser_select":
3515
3565
  await ensureConnected(browser);
3516
3566
  return browser.selectOption(input.selector, input.option);
@@ -3691,7 +3741,6 @@ var BROWSER_TOOL_NAMES = [
3691
3741
  "browser_type",
3692
3742
  "browser_press_key",
3693
3743
  "browser_scroll",
3694
- "browser_get_elements",
3695
3744
  "browser_select",
3696
3745
  "browser_snapshot",
3697
3746
  "browser_act",
@@ -3734,13 +3783,7 @@ function createBrowserMcpServer() {
3734
3783
  const base64 = await executeTool("browser_screenshot", {});
3735
3784
  if (base64.length > 100) {
3736
3785
  return {
3737
- content: [
3738
- {
3739
- type: "image",
3740
- data: base64,
3741
- mimeType: "image/png"
3742
- }
3743
- ]
3786
+ content: [{ type: "image", data: base64, mimeType: "image/png" }]
3744
3787
  };
3745
3788
  }
3746
3789
  return { content: [{ type: "text", text: base64 }] };
@@ -3773,12 +3816,6 @@ function createBrowserMcpServer() {
3773
3816
  { direction: z.string().describe("'down' or 'up'") },
3774
3817
  async (args) => callTool("browser_scroll", args)
3775
3818
  ),
3776
- tool(
3777
- "browser_get_elements",
3778
- "Find all interactive elements (links, buttons, inputs) on the current page.",
3779
- {},
3780
- async () => callTool("browser_get_elements", {})
3781
- ),
3782
3819
  tool(
3783
3820
  "browser_select",
3784
3821
  "Select an option from a dropdown menu. Handles both native <select> elements and custom dropdowns (Material Design, React, Angular). Use this instead of manually clicking dropdown items.",
@@ -3807,11 +3844,7 @@ function createBrowserMcpServer() {
3807
3844
  const imageData = parts[1] || "";
3808
3845
  const content = [];
3809
3846
  if (imageData.length > 100) {
3810
- content.push({
3811
- type: "image",
3812
- data: imageData,
3813
- mimeType: "image/png"
3814
- });
3847
+ content.push({ type: "image", data: imageData, mimeType: "image/png" });
3815
3848
  }
3816
3849
  content.push({ type: "text", text: refTable });
3817
3850
  return { content };
@@ -3847,11 +3880,7 @@ function createBrowserMcpServer() {
3847
3880
  const content = [];
3848
3881
  content.push({ type: "text", text: actionText });
3849
3882
  if (screenshotData.length > 100) {
3850
- content.push({
3851
- type: "image",
3852
- data: screenshotData,
3853
- mimeType: "image/png"
3854
- });
3883
+ content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
3855
3884
  }
3856
3885
  return { content };
3857
3886
  }
@@ -5125,7 +5154,7 @@ Available capabilities:
5125
5154
  - Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
5126
5155
 
5127
5156
  **Legacy tools (still available, use when refs don't work):**
5128
- - browser_click, browser_type, browser_select, browser_get_elements, browser_screenshot, browser_evaluate
5157
+ - browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
5129
5158
  - browser_click supports :contains('text') pseudo-selectors
5130
5159
  - browser_select handles native and custom dropdowns
5131
5160
 
@@ -5441,7 +5470,9 @@ var TaskProcessor = class {
5441
5470
  } finally {
5442
5471
  clearTimeout(timeoutId);
5443
5472
  }
5444
- await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
5473
+ const MAX_CONTENT_LENGTH = 5e4;
5474
+ const truncatedResponse = finalResponse.length > MAX_CONTENT_LENGTH ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]" : finalResponse;
5475
+ await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
5445
5476
  maxRetries: 2,
5446
5477
  baseDelayMs: 300,
5447
5478
  label: "completeTask"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -305,8 +305,15 @@ export class TaskProcessor {
305
305
  clearTimeout(timeoutId);
306
306
  }
307
307
 
308
+ // Truncate finalResponse to avoid edge function payload limits
309
+ const MAX_CONTENT_LENGTH = 50_000;
310
+ const truncatedResponse =
311
+ finalResponse.length > MAX_CONTENT_LENGTH
312
+ ? finalResponse.slice(0, MAX_CONTENT_LENGTH) + "\n\n[Response truncated]"
313
+ : finalResponse;
314
+
308
315
  // Complete the task (with retry for transient DB failures)
309
- await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
316
+ await withRetry(() => completeTask(task.id, truncatedResponse, tokenUsage), {
310
317
  maxRetries: 2,
311
318
  baseDelayMs: 300,
312
319
  label: "completeTask",
@@ -28,7 +28,7 @@ Available capabilities:
28
28
  - Refs persist across actions unless the page navigates. Re-snapshot after navigation or major DOM changes.
29
29
 
30
30
  **Legacy tools (still available, use when refs don't work):**
31
- - browser_click, browser_type, browser_select, browser_get_elements, browser_screenshot, browser_evaluate
31
+ - browser_click, browser_type, browser_select, browser_screenshot, browser_evaluate
32
32
  - browser_click supports :contains('text') pseudo-selectors
33
33
  - browser_select handles native and custom dropdowns
34
34
 
@@ -545,11 +545,13 @@ export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchRes
545
545
 
546
546
  // ── Singleton ───────────────────────────────────────────────────────
547
547
 
548
- let browserInstance: BrowserController | null = null;
548
+ const browserInstances = new Map<number, BrowserController>();
549
549
 
550
550
  export function getBrowser(port = 9222): BrowserController {
551
- if (!browserInstance) {
552
- browserInstance = new BrowserController(port);
551
+ let instance = browserInstances.get(port);
552
+ if (!instance) {
553
+ instance = new BrowserController(port);
554
+ browserInstances.set(port, instance);
553
555
  }
554
- return browserInstance;
556
+ return instance;
555
557
  }
@@ -10,6 +10,7 @@ import type {
10
10
  SnapshotResult,
11
11
  ActionSpec,
12
12
  ActionResult,
13
+ RefActionResult,
13
14
  } from "./types.js";
14
15
 
15
16
  export class BrowserController {
@@ -198,11 +199,26 @@ export class BrowserController {
198
199
 
199
200
  async goBack(): Promise<string> {
200
201
  this.ensureConnected();
201
- await this.send("Page.navigateToHistoryEntry", {
202
- entryId: -1,
203
- }).catch(() => {});
204
- // Fallback: use JS
205
- await this.evaluate("window.history.back()");
202
+ try {
203
+ // Get navigation history and go to the previous entry
204
+ const history = (await this.send("Page.getNavigationHistory")) as {
205
+ currentIndex?: number;
206
+ entries?: Array<{ id: number }>;
207
+ };
208
+ const idx = history.currentIndex ?? 0;
209
+ const entries = history.entries ?? [];
210
+ if (idx > 0 && entries[idx - 1]) {
211
+ await this.send("Page.navigateToHistoryEntry", {
212
+ entryId: entries[idx - 1].id,
213
+ });
214
+ } else {
215
+ // No previous entry in CDP history — use JS fallback
216
+ await this.evaluate("window.history.back()");
217
+ }
218
+ } catch {
219
+ // CDP history API failed — use JS fallback
220
+ await this.evaluate("window.history.back()");
221
+ }
206
222
  await this.waitForLoad();
207
223
  const info = await this.getPageInfo();
208
224
  return `Went back to: ${info.title}`;
@@ -394,31 +410,88 @@ export class BrowserController {
394
410
  Tab: { keyCode: 9, code: "Tab" },
395
411
  Escape: { keyCode: 27, code: "Escape" },
396
412
  Backspace: { keyCode: 8, code: "Backspace" },
413
+ Delete: { keyCode: 46, code: "Delete" },
397
414
  ArrowDown: { keyCode: 40, code: "ArrowDown" },
398
415
  ArrowUp: { keyCode: 38, code: "ArrowUp" },
416
+ ArrowLeft: { keyCode: 37, code: "ArrowLeft" },
417
+ ArrowRight: { keyCode: 39, code: "ArrowRight" },
418
+ Home: { keyCode: 36, code: "Home" },
419
+ End: { keyCode: 35, code: "End" },
420
+ Space: { keyCode: 32, code: "Space" },
399
421
  };
400
422
 
401
- const mapped = keyMap[key];
423
+ // CDP modifier bitmask values
424
+ const modifierMap: Record<string, number> = {
425
+ Alt: 1,
426
+ Control: 2,
427
+ Meta: 4,
428
+ Shift: 8,
429
+ };
430
+
431
+ // Parse modifier combos like "Control+a", "Meta+Shift+z"
432
+ const parts = key.split("+");
433
+ let modifiers = 0;
434
+ let actualKey = parts[parts.length - 1];
435
+ for (let i = 0; i < parts.length - 1; i++) {
436
+ const mod = modifierMap[parts[i]];
437
+ if (mod) modifiers |= mod;
438
+ }
439
+
440
+ const mapped = keyMap[actualKey];
402
441
  if (mapped) {
403
442
  await this.send("Input.dispatchKeyEvent", {
404
443
  type: "keyDown",
405
- key,
444
+ key: actualKey,
406
445
  code: mapped.code,
407
446
  windowsVirtualKeyCode: mapped.keyCode,
408
447
  nativeVirtualKeyCode: mapped.keyCode,
448
+ modifiers,
409
449
  });
410
450
  await this.send("Input.dispatchKeyEvent", {
411
451
  type: "keyUp",
412
- key,
452
+ key: actualKey,
413
453
  code: mapped.code,
414
454
  windowsVirtualKeyCode: mapped.keyCode,
415
455
  nativeVirtualKeyCode: mapped.keyCode,
456
+ modifiers,
457
+ });
458
+ } else if (actualKey.length === 1) {
459
+ // Single character key (e.g., "a", "z")
460
+ const code = `Key${actualKey.toUpperCase()}`;
461
+ const keyCode = actualKey.toUpperCase().charCodeAt(0);
462
+ await this.send("Input.dispatchKeyEvent", {
463
+ type: "keyDown",
464
+ key: actualKey,
465
+ code,
466
+ windowsVirtualKeyCode: keyCode,
467
+ nativeVirtualKeyCode: keyCode,
468
+ modifiers,
469
+ });
470
+ if (!modifiers) {
471
+ // Only insert text for unmodified single characters
472
+ await this.send("Input.dispatchKeyEvent", {
473
+ type: "char",
474
+ text: actualKey,
475
+ modifiers,
476
+ });
477
+ }
478
+ await this.send("Input.dispatchKeyEvent", {
479
+ type: "keyUp",
480
+ key: actualKey,
481
+ code,
482
+ modifiers,
416
483
  });
417
484
  } else {
418
- // Single character key
485
+ // Unknown key name — try as-is
419
486
  await this.send("Input.dispatchKeyEvent", {
420
- type: "char",
421
- text: key,
487
+ type: "keyDown",
488
+ key: actualKey,
489
+ modifiers,
490
+ });
491
+ await this.send("Input.dispatchKeyEvent", {
492
+ type: "keyUp",
493
+ key: actualKey,
494
+ modifiers,
422
495
  });
423
496
  }
424
497
 
@@ -816,8 +889,10 @@ export class BrowserController {
816
889
  * element is not yet actionable (e.g., covered by a loading overlay, still
817
890
  * animating into view). This matches Playwright's auto-waiting behavior.
818
891
  */
819
- async clickRef(refId: number): Promise<string> {
892
+ async clickRef(refId: number): Promise<RefActionResult> {
820
893
  this.ensureConnected();
894
+ const ref = this.refCache.get(refId);
895
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
821
896
 
822
897
  // Auto-wait: retry up to 3 times if element is not actionable yet
823
898
  const maxRetries = 3;
@@ -827,7 +902,10 @@ export class BrowserController {
827
902
  const resolved = await this.resolveRef(refId);
828
903
 
829
904
  if (!resolved) {
830
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
905
+ return {
906
+ success: false,
907
+ message: `Ref ${refLabel} not found. Take a new snapshot with browser_snapshot.`,
908
+ };
831
909
  }
832
910
 
833
911
  if (resolved.error) {
@@ -837,9 +915,7 @@ export class BrowserController {
837
915
  await new Promise((r) => setTimeout(r, 500));
838
916
  continue;
839
917
  }
840
- // Final attempt failed report the actionability issue
841
- const ref = this.refCache.get(refId);
842
- return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
918
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
843
919
  }
844
920
 
845
921
  // Element is actionable — small delay after scroll for rendering
@@ -875,13 +951,10 @@ export class BrowserController {
875
951
  });
876
952
 
877
953
  await new Promise((r) => setTimeout(r, 300));
878
- const ref = this.refCache.get(refId);
879
- return `Clicked [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
954
+ return { success: true, message: `Clicked ${refLabel}` };
880
955
  }
881
956
 
882
- // Should not reach here, but just in case
883
- const ref = this.refCache.get(refId);
884
- return `Cannot click [${refId}] ${ref?.role || ""} "${ref?.name || ""}": ${lastError}`;
957
+ return { success: false, message: `Cannot click ${refLabel}: ${lastError}` };
885
958
  }
886
959
 
887
960
  /**
@@ -889,48 +962,61 @@ export class BrowserController {
889
962
  * Clicks to focus, selects all existing text (Ctrl/Cmd+A), then uses
890
963
  * Input.insertText for reliable text insertion across all frameworks.
891
964
  */
892
- async typeRef(refId: number, text: string): Promise<string> {
965
+ async typeRef(refId: number, text: string): Promise<RefActionResult> {
893
966
  this.ensureConnected();
967
+ const ref = this.refCache.get(refId);
968
+ const refLabel = `[${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
894
969
 
895
970
  // Click to focus the element
896
971
  const clickResult = await this.clickRef(refId);
897
- if (clickResult.includes("not found")) return clickResult;
972
+ if (!clickResult.success) return clickResult;
898
973
  await new Promise((r) => setTimeout(r, 100));
899
974
 
900
- // Select all existing text (Cmd+A on macOS, Ctrl+A elsewhere)
901
- const modifier = platform() === "darwin" ? 4 : 2;
902
- await this.send("Input.dispatchKeyEvent", {
903
- type: "keyDown",
904
- modifiers: modifier,
905
- key: "a",
906
- code: "KeyA",
907
- windowsVirtualKeyCode: 65,
908
- });
909
- await this.send("Input.dispatchKeyEvent", {
910
- type: "keyUp",
911
- key: "a",
912
- code: "KeyA",
913
- });
975
+ // Clear existing text using multiple strategies for reliability:
976
+ // 1. Try Ctrl/Cmd+A to select all, then Backspace to delete
977
+ const selectAllKey = platform() === "darwin" ? "Meta+a" : "Control+a";
978
+ await this.pressKey(selectAllKey);
979
+ await new Promise((r) => setTimeout(r, 50));
980
+ await this.pressKey("Backspace");
981
+ await new Promise((r) => setTimeout(r, 50));
914
982
 
915
- // Delete selected text
916
- await this.send("Input.dispatchKeyEvent", {
917
- type: "keyDown",
918
- key: "Backspace",
919
- code: "Backspace",
920
- windowsVirtualKeyCode: 8,
921
- });
922
- await this.send("Input.dispatchKeyEvent", {
923
- type: "keyUp",
924
- key: "Backspace",
925
- code: "Backspace",
983
+ // 2. Verify the field is empty; if not, fall back to JS-based clearing
984
+ const cleared = await this.send("Runtime.evaluate", {
985
+ expression: `
986
+ (function() {
987
+ var el = document.querySelector('[data-assistme-ref="${refId}"]');
988
+ if (!el) return 'no_element';
989
+ if (el.value !== undefined && el.value !== '') {
990
+ // Ctrl+A didn't work (some frameworks intercept it) — clear via JS
991
+ var setter = Object.getOwnPropertyDescriptor(
992
+ window.HTMLInputElement.prototype, 'value'
993
+ )?.set || Object.getOwnPropertyDescriptor(
994
+ window.HTMLTextAreaElement.prototype, 'value'
995
+ )?.set;
996
+ if (setter) setter.call(el, '');
997
+ else el.value = '';
998
+ el.dispatchEvent(new Event('input', { bubbles: true }));
999
+ el.dispatchEvent(new Event('change', { bubbles: true }));
1000
+ return 'js_cleared';
1001
+ }
1002
+ return 'ok';
1003
+ })()
1004
+ `,
1005
+ returnByValue: true,
926
1006
  });
1007
+ const clearStatus = ((cleared as CDPEvalResult).result?.value as string) || "ok";
1008
+ if (clearStatus === "no_element") {
1009
+ return {
1010
+ success: false,
1011
+ message: `Ref ${refLabel} not found after click. Take a new snapshot.`,
1012
+ };
1013
+ }
927
1014
 
928
1015
  // Insert text via CDP (goes through the browser's input pipeline)
929
1016
  await this.send("Input.insertText", { text });
930
1017
 
931
1018
  await new Promise((r) => setTimeout(r, 100));
932
- const ref = this.refCache.get(refId);
933
- return `Typed "${text}" into [${refId}] ${ref?.role || ""} "${ref?.name || ""}"`;
1019
+ return { success: true, message: `Typed "${text}" into ${refLabel}` };
934
1020
  }
935
1021
 
936
1022
  /**
@@ -938,21 +1024,22 @@ export class BrowserController {
938
1024
  * ref's data attribute as selector, handling both native <select> and
939
1025
  * custom dropdown components.
940
1026
  */
941
- async selectRef(refId: number, option: string): Promise<string> {
1027
+ async selectRef(refId: number, option: string): Promise<RefActionResult> {
942
1028
  this.ensureConnected();
943
1029
 
944
- // Check if ref exists
945
1030
  const cached = this.refCache.get(refId);
946
1031
  if (!cached) {
947
- return `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`;
1032
+ return {
1033
+ success: false,
1034
+ message: `Ref [${refId}] not found. Take a new snapshot with browser_snapshot.`,
1035
+ };
948
1036
  }
949
1037
 
950
- // Use the data attribute selector to find the element
1038
+ const refLabel = `[${refId}] ${cached.role} "${cached.name}"`;
951
1039
  const result = await this.selectOption(`[data-assistme-ref="${refId}"]`, option);
952
- return result.replace(
953
- /\[data-assistme-ref="\d+"\]/,
954
- `[${refId}] ${cached.role} "${cached.name}"`
955
- );
1040
+ const message = result.replace(/\[data-assistme-ref="\d+"\]/, refLabel);
1041
+ const success = !result.includes("not found");
1042
+ return { success, message };
956
1043
  }
957
1044
 
958
1045
  // ── Action Pipeline ───────────────────────────────────────────────
@@ -977,18 +1064,24 @@ export class BrowserController {
977
1064
 
978
1065
  try {
979
1066
  switch (spec.action) {
980
- case "click":
981
- result = await this.clickRef(spec.ref);
982
- success = !result.includes("not found");
1067
+ case "click": {
1068
+ const r = await this.clickRef(spec.ref);
1069
+ result = r.message;
1070
+ success = r.success;
983
1071
  break;
984
- case "type":
985
- result = await this.typeRef(spec.ref, spec.text);
986
- success = !result.includes("not found");
1072
+ }
1073
+ case "type": {
1074
+ const r = await this.typeRef(spec.ref, spec.text);
1075
+ result = r.message;
1076
+ success = r.success;
987
1077
  break;
988
- case "select":
989
- result = await this.selectRef(spec.ref, spec.option);
990
- success = !result.includes("not found");
1078
+ }
1079
+ case "select": {
1080
+ const r = await this.selectRef(spec.ref, spec.option);
1081
+ result = r.message;
1082
+ success = r.success;
991
1083
  break;
1084
+ }
992
1085
  case "press":
993
1086
  result = await this.pressKey(spec.key);
994
1087
  break;
@@ -1074,15 +1167,24 @@ export class BrowserController {
1074
1167
  // Strategy 2: Custom dropdown — find the trigger element
1075
1168
  var trigger = selectEl;
1076
1169
  if (!trigger) {
1077
- // Try finding by label/placeholder text
1078
- var allEls = document.querySelectorAll('*');
1079
- for (var j = 0; j < allEls.length; j++) {
1080
- var el = allEls[j];
1170
+ // Try finding by aria-label first (fast, indexed)
1171
+ trigger = document.querySelector('[aria-label="' + sel.replace(/"/g, '\\"') + '"]');
1172
+ }
1173
+ if (!trigger) {
1174
+ // Try finding by label/placeholder text in likely dropdown elements
1175
+ var dropdownCandidates = document.querySelectorAll(
1176
+ 'button, [role="combobox"], [role="listbox"], [role="button"], ' +
1177
+ 'select, input, .MuiSelect-root, .MuiInput-root, ' +
1178
+ '[class*="select"], [class*="dropdown"], [class*="picker"]'
1179
+ );
1180
+ for (var j = 0; j < dropdownCandidates.length; j++) {
1181
+ var el = dropdownCandidates[j];
1081
1182
  var ownText = Array.from(el.childNodes)
1082
1183
  .filter(function(n) { return n.nodeType === 3; })
1083
1184
  .map(function(n) { return n.textContent.trim(); })
1084
1185
  .join('');
1085
- if (ownText === sel || el.getAttribute('aria-label') === sel) {
1186
+ if (ownText === sel || el.getAttribute('aria-label') === sel ||
1187
+ el.getAttribute('placeholder') === sel) {
1086
1188
  trigger = el;
1087
1189
  break;
1088
1190
  }
@@ -1119,10 +1221,13 @@ export class BrowserController {
1119
1221
  }
1120
1222
  }
1121
1223
 
1122
- // Broader search: any visible element with exact text match
1123
- var everything = document.querySelectorAll('*');
1124
- for (var m = 0; m < everything.length; m++) {
1125
- var candidate = everything[m];
1224
+ // Broader search: visible leaf elements in interactive containers
1225
+ var broadCandidates = document.querySelectorAll(
1226
+ 'li, span, div, a, button, label, [role="option"], [role="menuitem"], ' +
1227
+ '[role="menuitemradio"], [role="menuitemcheckbox"], [data-value]'
1228
+ );
1229
+ for (var m = 0; m < broadCandidates.length; m++) {
1230
+ var candidate = broadCandidates[m];
1126
1231
  if (candidate.textContent && candidate.textContent.trim() === optText &&
1127
1232
  candidate.offsetParent !== null && candidate.children.length === 0) {
1128
1233
  candidate.click();
@@ -1217,6 +1322,7 @@ export class BrowserController {
1217
1322
 
1218
1323
  private async waitForLoad(timeoutMs = 8000): Promise<void> {
1219
1324
  const start = Date.now();
1325
+ let sawInteractive = false;
1220
1326
  while (Date.now() - start < timeoutMs) {
1221
1327
  try {
1222
1328
  const result = await this.send("Runtime.evaluate", {
@@ -1224,71 +1330,27 @@ export class BrowserController {
1224
1330
  returnByValue: true,
1225
1331
  });
1226
1332
  const state = (result as CDPEvalResult).result?.value;
1227
- if (state === "complete" || state === "interactive") {
1228
- // Extra small wait for dynamic content
1229
- await new Promise((r) => setTimeout(r, 500));
1333
+ if (state === "complete") {
1334
+ // Fully loaded — brief wait for dynamic content
1335
+ await new Promise((r) => setTimeout(r, 300));
1230
1336
  return;
1231
1337
  }
1338
+ if (state === "interactive") {
1339
+ if (!sawInteractive) {
1340
+ sawInteractive = true;
1341
+ // DOM is ready but sub-resources still loading — give it more
1342
+ // time to reach "complete" before settling for "interactive"
1343
+ }
1344
+ }
1232
1345
  } catch {
1233
1346
  // Tab might be navigating
1234
1347
  }
1235
1348
  await new Promise((r) => setTimeout(r, 300));
1236
1349
  }
1237
- }
1238
-
1239
- /**
1240
- * Find interactive elements on the page for the AI to understand what's clickable
1241
- */
1242
- async getInteractiveElements(): Promise<string> {
1243
- this.ensureConnected();
1244
- const result = await this.send("Runtime.evaluate", {
1245
- expression: `
1246
- (function() {
1247
- const elements = [];
1248
- const selectors = 'a, button, input, select, textarea, [role="button"], [onclick]';
1249
- const all = document.querySelectorAll(selectors);
1250
- for (let i = 0; i < all.length && elements.length < 50; i++) {
1251
- const el = all[i];
1252
- const rect = el.getBoundingClientRect();
1253
- if (rect.width === 0 || rect.height === 0) continue; // Skip hidden
1254
-
1255
- // Build a reliable CSS selector
1256
- let selector;
1257
- if (el.id) {
1258
- selector = '#' + CSS.escape(el.id);
1259
- } else if (el.getAttribute('data-testid')) {
1260
- selector = '[data-testid="' + el.getAttribute('data-testid') + '"]';
1261
- } else {
1262
- // Build a path-based selector: find nth-of-type among siblings
1263
- const tag = el.tagName.toLowerCase();
1264
- const parent = el.parentElement;
1265
- if (parent) {
1266
- const siblings = parent.querySelectorAll(':scope > ' + tag);
1267
- const idx = Array.from(siblings).indexOf(el) + 1;
1268
- selector = tag + ':nth-of-type(' + idx + ')';
1269
- } else {
1270
- selector = tag;
1271
- }
1272
- }
1273
-
1274
- elements.push({
1275
- tag: el.tagName.toLowerCase(),
1276
- text: (el.textContent || '').trim().slice(0, 80),
1277
- type: el.getAttribute('type') || '',
1278
- name: el.getAttribute('name') || '',
1279
- id: el.id || '',
1280
- href: el.getAttribute('href') || '',
1281
- placeholder: el.getAttribute('placeholder') || '',
1282
- selector: selector,
1283
- });
1284
- }
1285
- return JSON.stringify(elements, null, 2);
1286
- })()
1287
- `,
1288
- returnByValue: true,
1289
- });
1290
-
1291
- return ((result as CDPEvalResult).result?.value as string) || "[]";
1350
+ // Timed out — if we at least saw "interactive", that's usually good enough
1351
+ if (sawInteractive) {
1352
+ await new Promise((r) => setTimeout(r, 300));
1353
+ }
1292
1354
  }
1293
1355
 
1294
1356
  isConnected(): boolean {
@@ -62,6 +62,12 @@ export interface ActionResult {
62
62
  success: boolean;
63
63
  }
64
64
 
65
+ /** Structured result from ref-based interactions (click, type, select). */
66
+ export interface RefActionResult {
67
+ success: boolean;
68
+ message: string;
69
+ }
70
+
65
71
  export interface AutoLaunchResult {
66
72
  success: boolean;
67
73
  action: "already_available" | "launched" | "chrome_not_found" | "launch_failed" | "port_conflict";
@@ -9,11 +9,15 @@ import { getLimiterForTool } from "../utils/rate-limiter.js";
9
9
 
10
10
  // ── Helper ──────────────────────────────────────────────────────────
11
11
 
12
+ /** MCP content block — text or image. */
13
+ type ContentBlock =
14
+ | { type: "text"; text: string }
15
+ | { type: "image"; data: string; mimeType: string };
16
+
17
+ type ToolResult = { content: ContentBlock[] };
18
+
12
19
  /** Wrap executeTool with rate limiting and text result. */
13
- async function callTool(
14
- name: string,
15
- input: Record<string, unknown>
16
- ): Promise<{ content: Array<{ type: "text"; text: string }> }> {
20
+ async function callTool(name: string, input: Record<string, unknown>): Promise<ToolResult> {
17
21
  const limiter = getLimiterForTool(name);
18
22
  if (limiter) await limiter.acquire();
19
23
  const result = await executeTool(name, input);
@@ -31,7 +35,6 @@ export const BROWSER_TOOL_NAMES = [
31
35
  "browser_type",
32
36
  "browser_press_key",
33
37
  "browser_scroll",
34
- "browser_get_elements",
35
38
  "browser_select",
36
39
  "browser_snapshot",
37
40
  "browser_act",
@@ -69,19 +72,13 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
69
72
  "browser_screenshot",
70
73
  "Take a screenshot of the current browser page. Returns a base64-encoded PNG image.",
71
74
  {},
72
- async () => {
75
+ async (): Promise<ToolResult> => {
73
76
  const limiter = getLimiterForTool("browser_screenshot");
74
77
  if (limiter) await limiter.acquire();
75
78
  const base64 = await executeTool("browser_screenshot", {});
76
79
  if (base64.length > 100) {
77
80
  return {
78
- content: [
79
- {
80
- type: "image" as const,
81
- data: base64,
82
- mimeType: "image/png",
83
- } as unknown as { type: "text"; text: string },
84
- ],
81
+ content: [{ type: "image", data: base64, mimeType: "image/png" }],
85
82
  };
86
83
  }
87
84
  return { content: [{ type: "text", text: base64 }] };
@@ -114,12 +111,6 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
114
111
  { direction: z.string().describe("'down' or 'up'") },
115
112
  async (args) => callTool("browser_scroll", args)
116
113
  ),
117
- tool(
118
- "browser_get_elements",
119
- "Find all interactive elements (links, buttons, inputs) on the current page.",
120
- {},
121
- async () => callTool("browser_get_elements", {})
122
- ),
123
114
  tool(
124
115
  "browser_select",
125
116
  "Select an option from a dropdown menu. Handles both native <select> elements and custom dropdowns (Material Design, React, Angular). Use this instead of manually clicking dropdown items.",
@@ -149,7 +140,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
149
140
  "Overlay ref badges on the screenshot. Default false. Use true for simple pages where visual context helps."
150
141
  ),
151
142
  },
152
- async (args) => {
143
+ async (args): Promise<ToolResult> => {
153
144
  const limiter = getLimiterForTool("browser_snapshot");
154
145
  if (limiter) await limiter.acquire();
155
146
  const result = await executeTool("browser_snapshot", args);
@@ -159,13 +150,9 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
159
150
  const refTable = parts[0];
160
151
  const imageData = parts[1] || "";
161
152
 
162
- const content: Array<{ type: "text"; text: string }> = [];
153
+ const content: ContentBlock[] = [];
163
154
  if (imageData.length > 100) {
164
- content.push({
165
- type: "image" as const,
166
- data: imageData,
167
- mimeType: "image/png",
168
- } as unknown as { type: "text"; text: string });
155
+ content.push({ type: "image", data: imageData, mimeType: "image/png" });
169
156
  }
170
157
  content.push({ type: "text", text: refTable });
171
158
 
@@ -197,7 +184,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
197
184
  .optional()
198
185
  .describe("Take screenshot after actions (default: false)"),
199
186
  },
200
- async (args) => {
187
+ async (args): Promise<ToolResult> => {
201
188
  const limiter = getLimiterForTool("browser_act");
202
189
  if (limiter) await limiter.acquire();
203
190
  const result = await executeTool("browser_act", {
@@ -210,14 +197,10 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
210
197
  const actionText = parts[0];
211
198
  const screenshotData = parts[1] || "";
212
199
 
213
- const content: Array<{ type: "text"; text: string }> = [];
200
+ const content: ContentBlock[] = [];
214
201
  content.push({ type: "text", text: actionText });
215
202
  if (screenshotData.length > 100) {
216
- content.push({
217
- type: "image" as const,
218
- data: screenshotData,
219
- mimeType: "image/png",
220
- } as unknown as { type: "text"; text: string });
203
+ content.push({ type: "image", data: screenshotData, mimeType: "image/png" });
221
204
  }
222
205
 
223
206
  return { content };
@@ -14,6 +14,7 @@ export type {
14
14
  SnapshotResult,
15
15
  ActionSpec,
16
16
  ActionResult,
17
+ RefActionResult,
17
18
  AutoLaunchResult,
18
19
  } from "../browser/types.js";
19
20
 
@@ -169,9 +169,6 @@ export async function executeTool(name: string, input: Record<string, unknown>):
169
169
  case "browser_scroll":
170
170
  await ensureConnected(browser);
171
171
  return (input.direction as string) === "up" ? browser.scrollUp() : browser.scrollDown();
172
- case "browser_get_elements":
173
- await ensureConnected(browser);
174
- return browser.getInteractiveElements();
175
172
  case "browser_select":
176
173
  await ensureConnected(browser);
177
174
  return browser.selectOption(input.selector as string, input.option as string);