browser-use 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +24 -18
  2. package/dist/actor/element.js +24 -3
  3. package/dist/actor/mouse.js +21 -3
  4. package/dist/actor/page.js +33 -11
  5. package/dist/agent/gif.js +28 -3
  6. package/dist/agent/message-manager/service.js +2 -22
  7. package/dist/agent/message-manager/utils.js +15 -2
  8. package/dist/agent/message-manager/views.d.ts +7 -7
  9. package/dist/agent/message-manager/views.js +1 -0
  10. package/dist/agent/prompts.d.ts +3 -0
  11. package/dist/agent/prompts.js +22 -12
  12. package/dist/agent/service.d.ts +9 -1
  13. package/dist/agent/service.js +204 -79
  14. package/dist/agent/system_prompt.md +12 -11
  15. package/dist/agent/system_prompt_anthropic_flash.md +6 -5
  16. package/dist/agent/system_prompt_no_thinking.md +12 -11
  17. package/dist/agent/views.d.ts +2 -0
  18. package/dist/agent/views.js +48 -36
  19. package/dist/browser/extensions.js +20 -10
  20. package/dist/browser/profile.d.ts +4 -0
  21. package/dist/browser/profile.js +107 -4
  22. package/dist/browser/session.d.ts +28 -1
  23. package/dist/browser/session.js +1436 -528
  24. package/dist/browser/watchdogs/default-action-watchdog.js +32 -3
  25. package/dist/browser/watchdogs/downloads-watchdog.d.ts +4 -0
  26. package/dist/browser/watchdogs/downloads-watchdog.js +105 -9
  27. package/dist/browser/watchdogs/har-recording-watchdog.d.ts +1 -0
  28. package/dist/browser/watchdogs/har-recording-watchdog.js +54 -2
  29. package/dist/browser/watchdogs/permissions-watchdog.d.ts +5 -0
  30. package/dist/browser/watchdogs/permissions-watchdog.js +106 -3
  31. package/dist/browser/watchdogs/recording-watchdog.d.ts +2 -0
  32. package/dist/browser/watchdogs/recording-watchdog.js +54 -2
  33. package/dist/browser/watchdogs/security-watchdog.d.ts +1 -0
  34. package/dist/browser/watchdogs/security-watchdog.js +47 -7
  35. package/dist/browser/watchdogs/storage-state-watchdog.d.ts +6 -0
  36. package/dist/browser/watchdogs/storage-state-watchdog.js +206 -14
  37. package/dist/cli.d.ts +13 -2
  38. package/dist/cli.js +187 -7
  39. package/dist/code-use/namespace.js +52 -7
  40. package/dist/code-use/notebook-export.js +18 -2
  41. package/dist/code-use/service.js +1 -0
  42. package/dist/config.js +26 -4
  43. package/dist/controller/action-timeout.d.ts +9 -0
  44. package/dist/controller/action-timeout.js +95 -0
  45. package/dist/controller/registry/service.d.ts +1 -0
  46. package/dist/controller/registry/service.js +28 -1
  47. package/dist/controller/service.d.ts +2 -1
  48. package/dist/controller/service.js +494 -329
  49. package/dist/filesystem/file-system.js +38 -8
  50. package/dist/integrations/gmail/service.js +30 -6
  51. package/dist/llm/browser-use/chat.js +2 -2
  52. package/dist/llm/codex/auth.d.ts +118 -0
  53. package/dist/llm/codex/auth.js +599 -0
  54. package/dist/llm/codex/chat.d.ts +70 -0
  55. package/dist/llm/codex/chat.js +392 -0
  56. package/dist/llm/codex/index.d.ts +2 -0
  57. package/dist/llm/codex/index.js +2 -0
  58. package/dist/llm/google/chat.js +18 -1
  59. package/dist/logging-config.js +22 -11
  60. package/dist/mcp/client.d.ts +1 -0
  61. package/dist/mcp/client.js +12 -10
  62. package/dist/mcp/redaction.d.ts +3 -0
  63. package/dist/mcp/redaction.js +132 -0
  64. package/dist/mcp/server.d.ts +2 -0
  65. package/dist/mcp/server.js +64 -22
  66. package/dist/screenshots/service.js +25 -2
  67. package/dist/skill-cli/direct.d.ts +4 -1
  68. package/dist/skill-cli/direct.js +260 -64
  69. package/dist/skill-cli/server.d.ts +1 -0
  70. package/dist/skill-cli/server.js +115 -25
  71. package/dist/skill-cli/tunnel.d.ts +1 -0
  72. package/dist/skill-cli/tunnel.js +16 -4
  73. package/dist/sync/auth.js +22 -9
  74. package/dist/telemetry/service.js +21 -2
  75. package/dist/telemetry/views.js +31 -8
  76. package/dist/tokens/custom-pricing.js +2 -2
  77. package/dist/tokens/openrouter-pricing.d.ts +11 -0
  78. package/dist/tokens/openrouter-pricing.js +102 -0
  79. package/dist/tokens/service.js +20 -16
  80. package/dist/utils.d.ts +3 -1
  81. package/dist/utils.js +3 -1
  82. package/package.json +68 -27
@@ -13,8 +13,18 @@ import { createLogger } from '../logging-config.js';
13
13
  import { sanitize_surrogates } from '../utils.js';
14
14
  import { findUnsupportedJsonSchemaKeyword, normalizeStructuredDataBySchema, } from '../tools/extraction/schema-utils.js';
15
15
  import { getClickDescription } from '../tools/utils.js';
16
+ import { isActionTimeoutError, runActionWithTimeout, } from './action-timeout.js';
16
17
  const DEFAULT_WAIT_OFFSET = 1;
17
18
  const MAX_WAIT_SECONDS = 30;
19
+ const chmodPrivateFile = async (filePath) => {
20
+ if (process.platform !== 'win32') {
21
+ await fsp.chmod(filePath, 0o600);
22
+ }
23
+ };
24
+ const writePrivateBinaryFile = async (filePath, data) => {
25
+ await fsp.writeFile(filePath, data, { mode: 0o600 });
26
+ await chmodPrivateFile(filePath);
27
+ };
18
28
  const toActionEntries = (action) => {
19
29
  if (!action) {
20
30
  return [];
@@ -81,6 +91,20 @@ const dispatchBrowserEventIfAvailable = async (browser_session, event, fallback)
81
91
  }
82
92
  return fallback();
83
93
  };
94
+ const validateBrowserPageAfterAction = async (browser_session, page, signal = null) => {
95
+ if (typeof browser_session?.validate_page_after_action === 'function') {
96
+ await browser_session.validate_page_after_action(page, signal);
97
+ return;
98
+ }
99
+ const assertUrlAllowed = browser_session?._assert_page_url_allowed_or_rollback;
100
+ if (typeof assertUrlAllowed === 'function' && page) {
101
+ await assertUrlAllowed.call(browser_session, page);
102
+ }
103
+ const syncCurrentTab = browser_session?._syncCurrentTabFromPage;
104
+ if (typeof syncCurrentTab === 'function') {
105
+ await syncCurrentTab.call(browser_session, page);
106
+ }
107
+ };
84
108
  const runWithTimeoutAndSignal = async (operation, timeoutMs, signal, timeoutMessage = 'Operation timed out') => {
85
109
  throwIfAborted(signal);
86
110
  if (timeoutMs <= 0) {
@@ -412,11 +436,17 @@ export class Controller {
412
436
  if (!page?.mouse?.click) {
413
437
  throw new BrowserError('Unable to perform coordinate click on the current page.');
414
438
  }
439
+ await validateBrowserPageAfterAction(browser_session, page, signal);
415
440
  const [actualX, actualY] = convertLlmCoordinatesToViewport(params.coordinate_x, params.coordinate_y, browser_session);
416
- await dispatchBrowserEventIfAvailable(browser_session, new ClickCoordinateEvent({
417
- coordinate_x: actualX,
418
- coordinate_y: actualY,
419
- }), () => page.mouse.click(actualX, actualY));
441
+ try {
442
+ await dispatchBrowserEventIfAvailable(browser_session, new ClickCoordinateEvent({
443
+ coordinate_x: actualX,
444
+ coordinate_y: actualY,
445
+ }), () => page.mouse.click(actualX, actualY));
446
+ }
447
+ finally {
448
+ await validateBrowserPageAfterAction(browser_session, page, signal);
449
+ }
420
450
  const coordinateMessage = `🖱️ Clicked at coordinates (${params.coordinate_x}, ${params.coordinate_y})` +
421
451
  (await detectNewTabNote(tabsBefore));
422
452
  return new ActionResult({
@@ -621,20 +651,39 @@ export class Controller {
621
651
  allowedPaths.add(downloadedPath);
622
652
  }
623
653
  if (!allowedPaths.has(uploadPath)) {
624
- const fsInstance = file_system ?? null;
625
- const managedFile = fsInstance && typeof fsInstance.get_file === 'function'
626
- ? fsInstance.get_file(uploadPath)
627
- : null;
628
- if (managedFile && fsInstance?.get_dir) {
629
- uploadPath = path.join(fsInstance.get_dir(), uploadPath);
630
- }
631
- else if (!isLocalBrowser) {
654
+ if (!isLocalBrowser) {
632
655
  // Remote browser paths may only exist on the remote runtime.
633
656
  }
634
657
  else {
635
- return new ActionResult({
636
- error: `File path ${params.path} is not available. To fix: add this file path to available_file_paths when creating the Agent.`,
637
- });
658
+ const fsInstance = file_system ?? null;
659
+ const managedFile = fsInstance && typeof fsInstance.get_file === 'function'
660
+ ? fsInstance.get_file(uploadPath)
661
+ : null;
662
+ if (managedFile && fsInstance?.get_dir) {
663
+ const fsDir = fsInstance.get_dir();
664
+ const managedFileName = String(managedFile.fullName ??
665
+ managedFile.full_name ??
666
+ path.basename(uploadPath));
667
+ const candidatePath = path.join(fsDir, managedFileName);
668
+ const realDir = fs.realpathSync(fsDir);
669
+ const realPath = fs.existsSync(candidatePath)
670
+ ? fs.realpathSync(candidatePath)
671
+ : path.resolve(candidatePath);
672
+ const relativePath = path.relative(realDir, realPath);
673
+ if (relativePath === '..' ||
674
+ relativePath.startsWith(`..${path.sep}`) ||
675
+ path.isAbsolute(relativePath)) {
676
+ return new ActionResult({
677
+ error: `Upload of ${params.path} escapes FileSystem directory; refusing.`,
678
+ });
679
+ }
680
+ uploadPath = candidatePath;
681
+ }
682
+ else {
683
+ return new ActionResult({
684
+ error: `File path ${params.path} is not available. To fix: add this file path to available_file_paths when creating the Agent.`,
685
+ });
686
+ }
638
687
  }
639
688
  }
640
689
  if (isLocalBrowser) {
@@ -666,7 +715,14 @@ export class Controller {
666
715
  try {
667
716
  const page = await browser_session.get_current_page?.();
668
717
  if (page?.evaluate) {
669
- const evaluated = await page.evaluate(() => window.scrollY || window.pageYOffset || 0);
718
+ await validateBrowserPageAfterAction(browser_session, page, signal);
719
+ let evaluated;
720
+ try {
721
+ evaluated = await page.evaluate(() => window.scrollY || window.pageYOffset || 0);
722
+ }
723
+ finally {
724
+ await validateBrowserPageAfterAction(browser_session, page, signal);
725
+ }
670
726
  const numeric = typeof evaluated === 'number' ? evaluated : Number(evaluated);
671
727
  if (Number.isFinite(numeric)) {
672
728
  currentScrollY = numeric;
@@ -877,7 +933,7 @@ export class Controller {
877
933
  const extractStructuredDescription = "LLM extracts structured data from page markdown. Use when: on right page, know what to extract, haven't called before on same page+query. Can't get interactive elements. Set extract_links=True for URLs. Use start_from_char if previous extraction was truncated to extract data further down the page. When paginating across pages, pass already_collected with item identifiers (names/URLs) from prior pages to avoid duplicates.";
878
934
  this.registry.action(extractStructuredDescription, {
879
935
  param_model: ExtractStructuredDataActionSchema,
880
- })(async function extract_structured_data(params, { page, page_extraction_llm, extraction_schema, file_system, signal }) {
936
+ })(async function extract_structured_data(params, { browser_session, page, page_extraction_llm, extraction_schema, file_system, signal, }) {
881
937
  throwIfAborted(signal);
882
938
  if (!page) {
883
939
  throw new BrowserError('No active page available for extraction.');
@@ -886,9 +942,15 @@ export class Controller {
886
942
  throw new BrowserError('page_extraction_llm is not configured.');
887
943
  }
888
944
  const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
945
+ await validateBrowserPageAfterAction(browser_session, page, signal);
889
946
  const pageHtml = await runWithTimeoutAndSignal(async () => {
890
- const value = await page.content?.();
891
- return typeof value === 'string' ? value : '';
947
+ try {
948
+ const value = await page.content?.();
949
+ return typeof value === 'string' ? value : '';
950
+ }
951
+ finally {
952
+ await validateBrowserPageAfterAction(browser_session, page, signal);
953
+ }
892
954
  }, 10000, signal, 'Page content extraction timed out');
893
955
  if (!pageHtml) {
894
956
  throw new BrowserError('Unable to extract page content.');
@@ -1168,72 +1230,79 @@ You will be given a query and the markdown of a webpage that has been filtered t
1168
1230
  if (!page?.evaluate) {
1169
1231
  throw new BrowserError('No active page for search_page.');
1170
1232
  }
1171
- const searchResult = (await page.evaluate(({ pattern, regex, caseSensitive, contextChars, cssScope, maxResults, }) => {
1172
- const sourceNode = cssScope
1173
- ? document.querySelector(cssScope)
1174
- : document.body;
1175
- if (!sourceNode) {
1176
- return {
1177
- error: `CSS scope not found: ${cssScope}`,
1178
- matches: [],
1179
- total: 0,
1180
- };
1181
- }
1182
- const sourceText = sourceNode.innerText ||
1183
- sourceNode.textContent ||
1184
- '';
1185
- if (!sourceText.trim()) {
1186
- return {
1187
- matches: [],
1188
- total: 0,
1189
- };
1190
- }
1191
- const safePattern = regex
1192
- ? pattern
1193
- : pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1194
- const flags = caseSensitive ? 'g' : 'gi';
1195
- let matcher;
1196
- try {
1197
- matcher = new RegExp(safePattern, flags);
1198
- }
1199
- catch (error) {
1200
- return {
1201
- error: `Invalid regex pattern: ${String(error)}`,
1202
- matches: [],
1203
- total: 0,
1204
- };
1205
- }
1206
- const matches = [];
1207
- let foundTotal = 0;
1208
- let m;
1209
- while ((m = matcher.exec(sourceText)) !== null) {
1210
- foundTotal += 1;
1211
- if (matches.length < Math.max(1, maxResults)) {
1212
- const start = Math.max(0, m.index - Math.max(0, contextChars));
1213
- const end = Math.min(sourceText.length, m.index + m[0].length + Math.max(0, contextChars));
1214
- matches.push({
1215
- position: m.index,
1216
- match: m[0],
1217
- snippet: sourceText.slice(start, end),
1218
- });
1233
+ let searchResult = null;
1234
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1235
+ try {
1236
+ searchResult = (await page.evaluate(({ pattern, regex, caseSensitive, contextChars, cssScope, maxResults, }) => {
1237
+ const sourceNode = cssScope
1238
+ ? document.querySelector(cssScope)
1239
+ : document.body;
1240
+ if (!sourceNode) {
1241
+ return {
1242
+ error: `CSS scope not found: ${cssScope}`,
1243
+ matches: [],
1244
+ total: 0,
1245
+ };
1219
1246
  }
1220
- if (m[0].length === 0) {
1221
- matcher.lastIndex += 1;
1247
+ const sourceText = sourceNode.innerText ||
1248
+ sourceNode.textContent ||
1249
+ '';
1250
+ if (!sourceText.trim()) {
1251
+ return {
1252
+ matches: [],
1253
+ total: 0,
1254
+ };
1222
1255
  }
1223
- }
1224
- return {
1225
- matches,
1226
- total: foundTotal,
1227
- truncated: foundTotal > matches.length,
1228
- };
1229
- }, {
1230
- pattern: params.pattern,
1231
- regex: params.regex,
1232
- caseSensitive: params.case_sensitive,
1233
- contextChars: params.context_chars,
1234
- cssScope: params.css_scope ?? null,
1235
- maxResults: params.max_results,
1236
- }));
1256
+ const safePattern = regex
1257
+ ? pattern
1258
+ : pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1259
+ const flags = caseSensitive ? 'g' : 'gi';
1260
+ let matcher;
1261
+ try {
1262
+ matcher = new RegExp(safePattern, flags);
1263
+ }
1264
+ catch (error) {
1265
+ return {
1266
+ error: `Invalid regex pattern: ${String(error)}`,
1267
+ matches: [],
1268
+ total: 0,
1269
+ };
1270
+ }
1271
+ const matches = [];
1272
+ let foundTotal = 0;
1273
+ let m;
1274
+ while ((m = matcher.exec(sourceText)) !== null) {
1275
+ foundTotal += 1;
1276
+ if (matches.length < Math.max(1, maxResults)) {
1277
+ const start = Math.max(0, m.index - Math.max(0, contextChars));
1278
+ const end = Math.min(sourceText.length, m.index + m[0].length + Math.max(0, contextChars));
1279
+ matches.push({
1280
+ position: m.index,
1281
+ match: m[0],
1282
+ snippet: sourceText.slice(start, end),
1283
+ });
1284
+ }
1285
+ if (m[0].length === 0) {
1286
+ matcher.lastIndex += 1;
1287
+ }
1288
+ }
1289
+ return {
1290
+ matches,
1291
+ total: foundTotal,
1292
+ truncated: foundTotal > matches.length,
1293
+ };
1294
+ }, {
1295
+ pattern: params.pattern,
1296
+ regex: params.regex,
1297
+ caseSensitive: params.case_sensitive,
1298
+ contextChars: params.context_chars,
1299
+ cssScope: params.css_scope ?? null,
1300
+ maxResults: params.max_results,
1301
+ }));
1302
+ }
1303
+ finally {
1304
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1305
+ }
1237
1306
  if (!searchResult) {
1238
1307
  return new ActionResult({ error: 'search_page returned no result' });
1239
1308
  }
@@ -1276,49 +1345,56 @@ You will be given a query and the markdown of a webpage that has been filtered t
1276
1345
  if (!page?.evaluate) {
1277
1346
  throw new BrowserError('No active page for find_elements.');
1278
1347
  }
1279
- const result = (await page.evaluate(({ selector, attributes, maxResults, includeText, }) => {
1280
- let elements;
1281
- try {
1282
- elements = Array.from(document.querySelectorAll(selector));
1283
- }
1284
- catch (error) {
1285
- return {
1286
- error: `Invalid selector: ${String(error)}`,
1287
- elements: [],
1288
- total: 0,
1289
- };
1290
- }
1291
- const selected = elements.slice(0, Math.max(1, maxResults));
1292
- const payload = selected.map((el, idx) => {
1293
- const attrs = {};
1294
- if (attributes?.length) {
1295
- for (const attr of attributes) {
1296
- const value = el.getAttribute(attr);
1297
- if (value != null) {
1298
- attrs[attr] = value;
1348
+ let result = null;
1349
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1350
+ try {
1351
+ result = (await page.evaluate(({ selector, attributes, maxResults, includeText, }) => {
1352
+ let elements;
1353
+ try {
1354
+ elements = Array.from(document.querySelectorAll(selector));
1355
+ }
1356
+ catch (error) {
1357
+ return {
1358
+ error: `Invalid selector: ${String(error)}`,
1359
+ elements: [],
1360
+ total: 0,
1361
+ };
1362
+ }
1363
+ const selected = elements.slice(0, Math.max(1, maxResults));
1364
+ const payload = selected.map((el, idx) => {
1365
+ const attrs = {};
1366
+ if (attributes?.length) {
1367
+ for (const attr of attributes) {
1368
+ const value = el.getAttribute(attr);
1369
+ if (value != null) {
1370
+ attrs[attr] = value;
1371
+ }
1299
1372
  }
1300
1373
  }
1301
- }
1374
+ return {
1375
+ index: idx + 1,
1376
+ tag: el.tagName.toLowerCase(),
1377
+ text: includeText
1378
+ ? (el.textContent || '').replace(/\s+/g, ' ').trim()
1379
+ : '',
1380
+ attributes: attrs,
1381
+ };
1382
+ });
1302
1383
  return {
1303
- index: idx + 1,
1304
- tag: el.tagName.toLowerCase(),
1305
- text: includeText
1306
- ? (el.textContent || '').replace(/\s+/g, ' ').trim()
1307
- : '',
1308
- attributes: attrs,
1384
+ elements: payload,
1385
+ total: elements.length,
1386
+ truncated: elements.length > selected.length,
1309
1387
  };
1310
- });
1311
- return {
1312
- elements: payload,
1313
- total: elements.length,
1314
- truncated: elements.length > selected.length,
1315
- };
1316
- }, {
1317
- selector: params.selector,
1318
- attributes: params.attributes ?? null,
1319
- maxResults: params.max_results,
1320
- includeText: params.include_text,
1321
- }));
1388
+ }, {
1389
+ selector: params.selector,
1390
+ attributes: params.attributes ?? null,
1391
+ maxResults: params.max_results,
1392
+ includeText: params.include_text,
1393
+ }));
1394
+ }
1395
+ finally {
1396
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1397
+ }
1322
1398
  if (!result) {
1323
1399
  return new ActionResult({ error: 'find_elements returned no result' });
1324
1400
  }
@@ -1367,42 +1443,44 @@ You will be given a query and the markdown of a webpage that has been filtered t
1367
1443
  if (!page || !page.evaluate) {
1368
1444
  throw new BrowserError('Unable to access current page for scrolling.');
1369
1445
  }
1370
- // Helper function to get window height with retries
1371
- const getWindowHeight = async (retries = 3) => {
1372
- for (let i = 0; i < retries; i++) {
1373
- throwIfAborted(signal);
1374
- try {
1375
- const height = await page.evaluate(() => window.innerHeight);
1376
- return height || 0;
1446
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1447
+ try {
1448
+ // Helper function to get window height with retries
1449
+ const getWindowHeight = async (retries = 3) => {
1450
+ for (let i = 0; i < retries; i++) {
1451
+ throwIfAborted(signal);
1452
+ try {
1453
+ const height = await page.evaluate(() => window.innerHeight);
1454
+ return height || 0;
1455
+ }
1456
+ catch (error) {
1457
+ if (i === retries - 1) {
1458
+ throw new Error(`Scroll failed due to an error: ${error}`, {
1459
+ cause: error,
1460
+ });
1461
+ }
1462
+ await waitWithSignal(1000, signal);
1463
+ }
1377
1464
  }
1378
- catch (error) {
1379
- if (i === retries - 1) {
1380
- throw new Error(`Scroll failed due to an error: ${error}`, {
1381
- cause: error,
1465
+ return 0;
1466
+ };
1467
+ const windowHeight = await getWindowHeight();
1468
+ const pagesScrolled = params.pages ?? params.num_pages ?? 1;
1469
+ const scrollAmount = Math.floor(windowHeight * pagesScrolled);
1470
+ const dy = params.down ? scrollAmount : -scrollAmount;
1471
+ const direction = params.down ? 'down' : 'up';
1472
+ let scrollTarget = 'the page';
1473
+ // Element-specific scrolling if index is provided
1474
+ if (params.index !== undefined && params.index !== null) {
1475
+ try {
1476
+ const elementNode = await browser_session.get_dom_element_by_index(params.index, { signal });
1477
+ if (!elementNode) {
1478
+ return new ActionResult({
1479
+ error: `Element index ${params.index} not found in browser state`,
1382
1480
  });
1383
1481
  }
1384
- await waitWithSignal(1000, signal);
1385
- }
1386
- }
1387
- return 0;
1388
- };
1389
- const windowHeight = await getWindowHeight();
1390
- const pagesScrolled = params.pages ?? params.num_pages ?? 1;
1391
- const scrollAmount = Math.floor(windowHeight * pagesScrolled);
1392
- const dy = params.down ? scrollAmount : -scrollAmount;
1393
- const direction = params.down ? 'down' : 'up';
1394
- let scrollTarget = 'the page';
1395
- // Element-specific scrolling if index is provided
1396
- if (params.index !== undefined && params.index !== null) {
1397
- try {
1398
- const elementNode = await browser_session.get_dom_element_by_index(params.index, { signal });
1399
- if (!elementNode) {
1400
- return new ActionResult({
1401
- error: `Element index ${params.index} not found in browser state`,
1402
- });
1403
- }
1404
- // Try direct container scrolling (no events that might close dropdowns)
1405
- const containerScrollJs = `
1482
+ // Try direct container scrolling (no events that might close dropdowns)
1483
+ const containerScrollJs = `
1406
1484
  (params) => {
1407
1485
  const { dy, elementXPath } = params;
1408
1486
 
@@ -1482,70 +1560,74 @@ You will be given a query and the markdown of a webpage that has been filtered t
1482
1560
  }
1483
1561
  }
1484
1562
  `;
1485
- const scrollParams = { dy, elementXPath: elementNode.xpath };
1486
- const result = (await page.evaluate(containerScrollJs, scrollParams));
1487
- if (result.success) {
1488
- if (result.containerType === 'element') {
1489
- let containerInfo = result.containerTag;
1490
- if (result.containerId) {
1491
- containerInfo += `#${result.containerId}`;
1563
+ const scrollParams = { dy, elementXPath: elementNode.xpath };
1564
+ const result = (await page.evaluate(containerScrollJs, scrollParams));
1565
+ if (result.success) {
1566
+ if (result.containerType === 'element') {
1567
+ let containerInfo = result.containerTag;
1568
+ if (result.containerId) {
1569
+ containerInfo += `#${result.containerId}`;
1570
+ }
1571
+ else if (result.containerClass) {
1572
+ containerInfo += `.${result.containerClass.split(' ')[0]}`;
1573
+ }
1574
+ scrollTarget = `element ${params.index}'s scroll container (${containerInfo})`;
1575
+ // Don't do additional page scrolling since we successfully scrolled the container
1492
1576
  }
1493
- else if (result.containerClass) {
1494
- containerInfo += `.${result.containerClass.split(' ')[0]}`;
1577
+ else {
1578
+ scrollTarget = `the page (fallback from element ${params.index})`;
1495
1579
  }
1496
- scrollTarget = `element ${params.index}'s scroll container (${containerInfo})`;
1497
- // Don't do additional page scrolling since we successfully scrolled the container
1498
1580
  }
1499
1581
  else {
1500
- scrollTarget = `the page (fallback from element ${params.index})`;
1582
+ // Container scroll failed, need page-level scrolling
1583
+ scrollLogger.debug(`Container scroll failed for element ${params.index}: ${result.reason || 'Unknown'}`);
1584
+ scrollTarget = `the page (no container found for element ${params.index})`;
1585
+ // This will trigger page-level scrolling below
1501
1586
  }
1502
1587
  }
1503
- else {
1504
- // Container scroll failed, need page-level scrolling
1505
- scrollLogger.debug(`Container scroll failed for element ${params.index}: ${result.reason || 'Unknown'}`);
1506
- scrollTarget = `the page (no container found for element ${params.index})`;
1507
- // This will trigger page-level scrolling below
1588
+ catch (error) {
1589
+ scrollLogger.debug(`Element-specific scrolling failed for index ${params.index}: ${error}`);
1590
+ scrollTarget = `the page (fallback from element ${params.index})`;
1591
+ // Fall through to page-level scrolling
1508
1592
  }
1509
1593
  }
1510
- catch (error) {
1511
- scrollLogger.debug(`Element-specific scrolling failed for index ${params.index}: ${error}`);
1512
- scrollTarget = `the page (fallback from element ${params.index})`;
1513
- // Fall through to page-level scrolling
1594
+ // Page-level scrolling (default or fallback)
1595
+ if (scrollTarget === 'the page' ||
1596
+ scrollTarget.includes('fallback') ||
1597
+ scrollTarget.includes('no container found') ||
1598
+ scrollTarget.includes('mouse wheel failed')) {
1599
+ scrollLogger.debug(`🔄 Performing page-level scrolling. Reason: ${scrollTarget}`);
1600
+ try {
1601
+ await dispatchBrowserEventIfAvailable(browser_session, new ScrollEvent({
1602
+ direction,
1603
+ amount: Math.abs(dy),
1604
+ }), () => browser_session._scrollContainer(dy));
1605
+ }
1606
+ catch (error) {
1607
+ // Hard fallback: always works on root scroller
1608
+ await page.evaluate((y) => window.scrollBy(0, y), dy);
1609
+ scrollLogger.debug('Smart scroll failed; used window.scrollBy fallback', error);
1610
+ }
1514
1611
  }
1515
- }
1516
- // Page-level scrolling (default or fallback)
1517
- if (scrollTarget === 'the page' ||
1518
- scrollTarget.includes('fallback') ||
1519
- scrollTarget.includes('no container found') ||
1520
- scrollTarget.includes('mouse wheel failed')) {
1521
- scrollLogger.debug(`🔄 Performing page-level scrolling. Reason: ${scrollTarget}`);
1522
- try {
1523
- await dispatchBrowserEventIfAvailable(browser_session, new ScrollEvent({
1524
- direction,
1525
- amount: Math.abs(dy),
1526
- }), () => browser_session._scrollContainer(dy));
1612
+ // Create descriptive message
1613
+ let longTermMemory;
1614
+ if (pagesScrolled === 1.0) {
1615
+ longTermMemory = `Scrolled ${direction} ${scrollTarget} by one page`;
1527
1616
  }
1528
- catch (error) {
1529
- // Hard fallback: always works on root scroller
1530
- await page.evaluate((y) => window.scrollBy(0, y), dy);
1531
- scrollLogger.debug('Smart scroll failed; used window.scrollBy fallback', error);
1617
+ else {
1618
+ longTermMemory = `Scrolled ${direction} ${scrollTarget} by ${pagesScrolled} pages`;
1532
1619
  }
1620
+ const msg = `🔍 ${longTermMemory}`;
1621
+ scrollLogger.info(msg);
1622
+ return new ActionResult({
1623
+ extracted_content: msg,
1624
+ include_in_memory: true,
1625
+ long_term_memory: longTermMemory,
1626
+ });
1533
1627
  }
1534
- // Create descriptive message
1535
- let longTermMemory;
1536
- if (pagesScrolled === 1.0) {
1537
- longTermMemory = `Scrolled ${direction} ${scrollTarget} by one page`;
1538
- }
1539
- else {
1540
- longTermMemory = `Scrolled ${direction} ${scrollTarget} by ${pagesScrolled} pages`;
1628
+ finally {
1629
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1541
1630
  }
1542
- const msg = `🔍 ${longTermMemory}`;
1543
- scrollLogger.info(msg);
1544
- return new ActionResult({
1545
- extracted_content: msg,
1546
- include_in_memory: true,
1547
- long_term_memory: longTermMemory,
1548
- });
1549
1631
  };
1550
1632
  // Register scroll action with multiple names for LLM compatibility
1551
1633
  // Different LLMs may use different names: scroll, scroll_page, scroll_down
@@ -1601,20 +1683,27 @@ You will be given a query and the markdown of a webpage that has been filtered t
1601
1683
  if (!page?.evaluate) {
1602
1684
  throw new BrowserError('Unable to access page for scrolling.');
1603
1685
  }
1604
- const success = await page.evaluate(({ text }) => {
1605
- const iterator = document.createNodeIterator(document.body, NodeFilter.SHOW_ELEMENT);
1606
- let node;
1607
- while ((node = iterator.nextNode())) {
1608
- const el = node;
1609
- if (!el || !el.textContent)
1610
- continue;
1611
- if (el.textContent.toLowerCase().includes(text.toLowerCase())) {
1612
- el.scrollIntoView({ behavior: 'smooth', block: 'center' });
1613
- return true;
1686
+ await validateBrowserPageAfterAction(browser_session, page);
1687
+ let success = false;
1688
+ try {
1689
+ success = await page.evaluate(({ text }) => {
1690
+ const iterator = document.createNodeIterator(document.body, NodeFilter.SHOW_ELEMENT);
1691
+ let node;
1692
+ while ((node = iterator.nextNode())) {
1693
+ const el = node;
1694
+ if (!el || !el.textContent)
1695
+ continue;
1696
+ if (el.textContent.toLowerCase().includes(text.toLowerCase())) {
1697
+ el.scrollIntoView({ behavior: 'smooth', block: 'center' });
1698
+ return true;
1699
+ }
1614
1700
  }
1615
- }
1616
- return false;
1617
- }, { text: params.text });
1701
+ return false;
1702
+ }, { text: params.text });
1703
+ }
1704
+ finally {
1705
+ await validateBrowserPageAfterAction(browser_session, page);
1706
+ }
1618
1707
  if (!success) {
1619
1708
  throw new BrowserError(`Text '${params.text}' not found on page`);
1620
1709
  }
@@ -1754,7 +1843,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
1754
1843
  }
1755
1844
  fileName = FileSystem.sanitize_filename(fileName);
1756
1845
  const filePath = path.join(fsInstance.get_dir(), fileName);
1757
- await fsp.writeFile(filePath, Buffer.from(screenshotB64, 'base64'));
1846
+ await writePrivateBinaryFile(filePath, Buffer.from(screenshotB64, 'base64'));
1758
1847
  const msg = `📸 Saved screenshot to ${filePath}`;
1759
1848
  return new ActionResult({
1760
1849
  extracted_content: msg,
@@ -1784,20 +1873,28 @@ You will be given a query and the markdown of a webpage that has been filtered t
1784
1873
  if (!page) {
1785
1874
  throw new BrowserError('No active page available for save_as_pdf.');
1786
1875
  }
1876
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1787
1877
  const paperKey = String(params.paper_format ?? 'Letter').toLowerCase();
1788
1878
  const paperSize = paperSizes[paperKey] ?? paperSizes.letter;
1789
1879
  const cdpSession = await browser_session.get_or_create_cdp_session?.(page);
1790
1880
  if (!cdpSession?.send) {
1791
1881
  throw new BrowserError('CDP session unavailable for save_as_pdf.');
1792
1882
  }
1793
- const result = await cdpSession.send('Page.printToPDF', {
1794
- printBackground: params.print_background,
1795
- landscape: params.landscape,
1796
- scale: params.scale,
1797
- paperWidth: paperSize.width,
1798
- paperHeight: paperSize.height,
1799
- preferCSSPageSize: true,
1800
- });
1883
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1884
+ let result = null;
1885
+ try {
1886
+ result = await cdpSession.send('Page.printToPDF', {
1887
+ printBackground: params.print_background,
1888
+ landscape: params.landscape,
1889
+ scale: params.scale,
1890
+ paperWidth: paperSize.width,
1891
+ paperHeight: paperSize.height,
1892
+ preferCSSPageSize: true,
1893
+ });
1894
+ }
1895
+ finally {
1896
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1897
+ }
1801
1898
  const pdfData = result && typeof result.data === 'string' ? result.data : null;
1802
1899
  if (!pdfData) {
1803
1900
  throw new BrowserError('CDP Page.printToPDF returned no data.');
@@ -1805,6 +1902,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
1805
1902
  const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
1806
1903
  let fileName = params.file_name?.trim();
1807
1904
  if (!fileName) {
1905
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1808
1906
  try {
1809
1907
  const titlePromise = typeof page.title === 'function'
1810
1908
  ? page.title()
@@ -1824,13 +1922,17 @@ You will be given a query and the markdown of a webpage that has been filtered t
1824
1922
  catch {
1825
1923
  fileName = 'page';
1826
1924
  }
1925
+ finally {
1926
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1927
+ }
1827
1928
  }
1828
1929
  if (!fileName.toLowerCase().endsWith('.pdf')) {
1829
1930
  fileName = `${fileName}.pdf`;
1830
1931
  }
1831
1932
  fileName = FileSystem.sanitize_filename(fileName);
1832
1933
  const filePath = await resolveUniqueOutputPath(fsInstance.get_dir(), fileName);
1833
- await fsp.writeFile(filePath, Buffer.from(pdfData, 'base64'));
1934
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1935
+ await writePrivateBinaryFile(filePath, Buffer.from(pdfData, 'base64'));
1834
1936
  const fileSize = (await fsp.stat(filePath)).size;
1835
1937
  const baseName = path.basename(filePath);
1836
1938
  const msg = `Saved page as PDF: ${baseName} (${fileSize.toLocaleString()} bytes)`;
@@ -1849,32 +1951,39 @@ You will be given a query and the markdown of a webpage that has been filtered t
1849
1951
  throw new BrowserError('No active page available for evaluate.');
1850
1952
  }
1851
1953
  const validatedCode = validateAndFixJavaScript(params.code);
1852
- const payload = (await page.evaluate(async ({ code }) => {
1853
- try {
1854
- const raw = await Promise.resolve((0, eval)(code));
1855
- let serializedResult;
1856
- if (raw === undefined) {
1857
- serializedResult = null;
1858
- }
1859
- else {
1860
- try {
1861
- serializedResult = JSON.parse(JSON.stringify(raw));
1954
+ let payload = null;
1955
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1956
+ try {
1957
+ payload = (await page.evaluate(async ({ code }) => {
1958
+ try {
1959
+ const raw = await Promise.resolve((0, eval)(code));
1960
+ let serializedResult;
1961
+ if (raw === undefined) {
1962
+ serializedResult = null;
1862
1963
  }
1863
- catch {
1864
- serializedResult = String(raw);
1964
+ else {
1965
+ try {
1966
+ serializedResult = JSON.parse(JSON.stringify(raw));
1967
+ }
1968
+ catch {
1969
+ serializedResult = String(raw);
1970
+ }
1865
1971
  }
1972
+ return { ok: true, result: serializedResult };
1866
1973
  }
1867
- return { ok: true, result: serializedResult };
1868
- }
1869
- catch (error) {
1870
- return {
1871
- ok: false,
1872
- error: error instanceof Error
1873
- ? error.message
1874
- : String(error ?? 'Unknown evaluate error'),
1875
- };
1876
- }
1877
- }, { code: validatedCode }));
1974
+ catch (error) {
1975
+ return {
1976
+ ok: false,
1977
+ error: error instanceof Error
1978
+ ? error.message
1979
+ : String(error ?? 'Unknown evaluate error'),
1980
+ };
1981
+ }
1982
+ }, { code: validatedCode }));
1983
+ }
1984
+ finally {
1985
+ await validateBrowserPageAfterAction(browser_session, page, signal);
1986
+ }
1878
1987
  if (!payload) {
1879
1988
  return new ActionResult({ error: 'evaluate returned no result' });
1880
1989
  }
@@ -1929,6 +2038,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
1929
2038
  if (!keyboard) {
1930
2039
  throw new BrowserError('Keyboard input is not available on the current page.');
1931
2040
  }
2041
+ await validateBrowserPageAfterAction(browser_session, page);
1932
2042
  try {
1933
2043
  await keyboard.press(params.keys);
1934
2044
  }
@@ -1943,6 +2053,9 @@ You will be given a query and the markdown of a webpage that has been filtered t
1943
2053
  throw error;
1944
2054
  }
1945
2055
  }
2056
+ finally {
2057
+ await validateBrowserPageAfterAction(browser_session, page);
2058
+ }
1946
2059
  return null;
1947
2060
  });
1948
2061
  const msg = `⌨️ Sent keys: ${params.keys}`;
@@ -1996,31 +2109,38 @@ You will be given a query and the markdown of a webpage that has been filtered t
1996
2109
  if (!domElement.xpath) {
1997
2110
  throw new BrowserError('DOM element does not include an XPath selector.');
1998
2111
  }
1999
- const payload = await page.evaluate(({ xpath }) => {
2000
- const element = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
2001
- if (!element)
2112
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2113
+ let payload = null;
2114
+ try {
2115
+ payload = await page.evaluate(({ xpath }) => {
2116
+ const element = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
2117
+ if (!element)
2118
+ return null;
2119
+ if (element.tagName?.toLowerCase() === 'select') {
2120
+ const options = Array.from(element.options).map((opt, index) => ({
2121
+ text: opt.textContent?.trim() ?? '',
2122
+ value: (opt.value ?? '').trim(),
2123
+ index,
2124
+ }));
2125
+ return { type: 'select', options };
2126
+ }
2127
+ const ariaRoles = new Set(['menu', 'listbox', 'combobox']);
2128
+ const role = element.getAttribute('role');
2129
+ if (role && ariaRoles.has(role)) {
2130
+ const nodes = element.querySelectorAll('[role="menuitem"],[role="option"]');
2131
+ const options = Array.from(nodes).map((node, index) => ({
2132
+ text: node.textContent?.trim() ?? '',
2133
+ value: node.textContent?.trim() ?? '',
2134
+ index,
2135
+ }));
2136
+ return { type: 'aria', options };
2137
+ }
2002
2138
  return null;
2003
- if (element.tagName?.toLowerCase() === 'select') {
2004
- const options = Array.from(element.options).map((opt, index) => ({
2005
- text: opt.textContent?.trim() ?? '',
2006
- value: (opt.value ?? '').trim(),
2007
- index,
2008
- }));
2009
- return { type: 'select', options };
2010
- }
2011
- const ariaRoles = new Set(['menu', 'listbox', 'combobox']);
2012
- const role = element.getAttribute('role');
2013
- if (role && ariaRoles.has(role)) {
2014
- const nodes = element.querySelectorAll('[role="menuitem"],[role="option"]');
2015
- const options = Array.from(nodes).map((node, index) => ({
2016
- text: node.textContent?.trim() ?? '',
2017
- value: node.textContent?.trim() ?? '',
2018
- index,
2019
- }));
2020
- return { type: 'aria', options };
2021
- }
2022
- return null;
2023
- }, { xpath: domElement.xpath });
2139
+ }, { xpath: domElement.xpath });
2140
+ }
2141
+ finally {
2142
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2143
+ }
2024
2144
  if (!payload || !payload.options?.length) {
2025
2145
  throw new BrowserError('No options found for the specified dropdown.');
2026
2146
  }
@@ -2080,6 +2200,7 @@ You will be given a query and the markdown of a webpage that has been filtered t
2080
2200
  if (!page) {
2081
2201
  throw new BrowserError('No active page for selection.');
2082
2202
  }
2203
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2083
2204
  for (const frame of page.frames ?? []) {
2084
2205
  try {
2085
2206
  const typeInfo = await frame.evaluate((xpath) => {
@@ -2209,6 +2330,9 @@ You will be given a query and the markdown of a webpage that has been filtered t
2209
2330
  }
2210
2331
  continue;
2211
2332
  }
2333
+ finally {
2334
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2335
+ }
2212
2336
  }
2213
2337
  throw new BrowserError(`Could not select option '${params.text}' for index ${params.index}`);
2214
2338
  });
@@ -2228,17 +2352,23 @@ You will be given a query and the markdown of a webpage that has been filtered t
2228
2352
  throw new Error('Browser session missing');
2229
2353
  throwIfAborted(signal);
2230
2354
  const page = await browser_session.get_current_page();
2231
- await page?.keyboard?.press('Enter');
2232
- await page?.keyboard?.press('Escape');
2233
- await page?.keyboard?.press('ControlOrMeta+A');
2234
- await page?.keyboard?.press('ControlOrMeta+C');
2235
- const content = await page?.evaluate?.(() => navigator.clipboard.readText());
2236
- return new ActionResult({
2237
- extracted_content: content ?? '',
2238
- include_in_memory: true,
2239
- long_term_memory: 'Retrieved sheet contents',
2240
- include_extracted_content_only_once: true,
2241
- });
2355
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2356
+ try {
2357
+ await page?.keyboard?.press('Enter');
2358
+ await page?.keyboard?.press('Escape');
2359
+ await page?.keyboard?.press('ControlOrMeta+A');
2360
+ await page?.keyboard?.press('ControlOrMeta+C');
2361
+ const content = await page?.evaluate?.(() => navigator.clipboard.readText());
2362
+ return new ActionResult({
2363
+ extracted_content: content ?? '',
2364
+ include_in_memory: true,
2365
+ long_term_memory: 'Retrieved sheet contents',
2366
+ include_extracted_content_only_once: true,
2367
+ });
2368
+ }
2369
+ finally {
2370
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2371
+ }
2242
2372
  });
2243
2373
  this.registry.action('Google Sheets: Get the contents of a cell or range of cells', {
2244
2374
  domains: ['https://docs.google.com'],
@@ -2248,16 +2378,22 @@ You will be given a query and the markdown of a webpage that has been filtered t
2248
2378
  throw new Error('Browser session missing');
2249
2379
  throwIfAborted(signal);
2250
2380
  const page = await browser_session.get_current_page();
2251
- await gotoSheetsRange(page, params.cell_or_range, signal);
2252
- await page?.keyboard?.press('ControlOrMeta+C');
2253
- await waitWithSignal(100, signal);
2254
- const content = await page?.evaluate?.(() => navigator.clipboard.readText());
2255
- return new ActionResult({
2256
- extracted_content: content ?? '',
2257
- include_in_memory: true,
2258
- long_term_memory: `Retrieved contents from ${params.cell_or_range}`,
2259
- include_extracted_content_only_once: true,
2260
- });
2381
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2382
+ try {
2383
+ await gotoSheetsRange(page, params.cell_or_range, signal);
2384
+ await page?.keyboard?.press('ControlOrMeta+C');
2385
+ await waitWithSignal(100, signal);
2386
+ const content = await page?.evaluate?.(() => navigator.clipboard.readText());
2387
+ return new ActionResult({
2388
+ extracted_content: content ?? '',
2389
+ include_in_memory: true,
2390
+ long_term_memory: `Retrieved contents from ${params.cell_or_range}`,
2391
+ include_extracted_content_only_once: true,
2392
+ });
2393
+ }
2394
+ finally {
2395
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2396
+ }
2261
2397
  });
2262
2398
  this.registry.action('Google Sheets: Update the content of a cell or range of cells', {
2263
2399
  domains: ['https://docs.google.com'],
@@ -2267,16 +2403,22 @@ You will be given a query and the markdown of a webpage that has been filtered t
2267
2403
  throw new Error('Browser session missing');
2268
2404
  throwIfAborted(signal);
2269
2405
  const page = await browser_session.get_current_page();
2270
- await gotoSheetsRange(page, params.cell_or_range, signal);
2271
- await page?.evaluate?.((value) => {
2272
- const clipboardData = new DataTransfer();
2273
- clipboardData.setData('text/plain', value);
2274
- document.activeElement?.dispatchEvent(new ClipboardEvent('paste', { clipboardData }));
2275
- }, params.value);
2276
- return new ActionResult({
2277
- extracted_content: `Updated cells: ${params.cell_or_range} = ${params.value}`,
2278
- long_term_memory: `Updated cells ${params.cell_or_range} with ${params.value}`,
2279
- });
2406
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2407
+ try {
2408
+ await gotoSheetsRange(page, params.cell_or_range, signal);
2409
+ await page?.evaluate?.((value) => {
2410
+ const clipboardData = new DataTransfer();
2411
+ clipboardData.setData('text/plain', value);
2412
+ document.activeElement?.dispatchEvent(new ClipboardEvent('paste', { clipboardData }));
2413
+ }, params.value);
2414
+ return new ActionResult({
2415
+ extracted_content: `Updated cells: ${params.cell_or_range} = ${params.value}`,
2416
+ long_term_memory: `Updated cells ${params.cell_or_range} with ${params.value}`,
2417
+ });
2418
+ }
2419
+ finally {
2420
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2421
+ }
2280
2422
  });
2281
2423
  this.registry.action('Google Sheets: Clear whatever cells are currently selected', {
2282
2424
  domains: ['https://docs.google.com'],
@@ -2286,12 +2428,18 @@ You will be given a query and the markdown of a webpage that has been filtered t
2286
2428
  throw new Error('Browser session missing');
2287
2429
  throwIfAborted(signal);
2288
2430
  const page = await browser_session.get_current_page();
2289
- await gotoSheetsRange(page, params.cell_or_range, signal);
2290
- await page?.keyboard?.press('Backspace');
2291
- return new ActionResult({
2292
- extracted_content: `Cleared cells: ${params.cell_or_range}`,
2293
- long_term_memory: `Cleared cells ${params.cell_or_range}`,
2294
- });
2431
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2432
+ try {
2433
+ await gotoSheetsRange(page, params.cell_or_range, signal);
2434
+ await page?.keyboard?.press('Backspace');
2435
+ return new ActionResult({
2436
+ extracted_content: `Cleared cells: ${params.cell_or_range}`,
2437
+ long_term_memory: `Cleared cells ${params.cell_or_range}`,
2438
+ });
2439
+ }
2440
+ finally {
2441
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2442
+ }
2295
2443
  });
2296
2444
  this.registry.action('Google Sheets: Select a specific cell or range of cells', {
2297
2445
  domains: ['https://docs.google.com'],
@@ -2301,11 +2449,17 @@ You will be given a query and the markdown of a webpage that has been filtered t
2301
2449
  throw new Error('Browser session missing');
2302
2450
  throwIfAborted(signal);
2303
2451
  const page = await browser_session.get_current_page();
2304
- await gotoSheetsRange(page, params.cell_or_range, signal);
2305
- return new ActionResult({
2306
- extracted_content: `Selected cells: ${params.cell_or_range}`,
2307
- long_term_memory: `Selected cells ${params.cell_or_range}`,
2308
- });
2452
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2453
+ try {
2454
+ await gotoSheetsRange(page, params.cell_or_range, signal);
2455
+ return new ActionResult({
2456
+ extracted_content: `Selected cells: ${params.cell_or_range}`,
2457
+ long_term_memory: `Selected cells ${params.cell_or_range}`,
2458
+ });
2459
+ }
2460
+ finally {
2461
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2462
+ }
2309
2463
  });
2310
2464
  this.registry.action('Google Sheets: Fallback method to type text into the currently selected cell', {
2311
2465
  domains: ['https://docs.google.com'],
@@ -2315,13 +2469,19 @@ You will be given a query and the markdown of a webpage that has been filtered t
2315
2469
  throw new Error('Browser session missing');
2316
2470
  throwIfAborted(signal);
2317
2471
  const page = await browser_session.get_current_page();
2318
- await page?.keyboard?.type(params.text, { delay: 100 });
2319
- await page?.keyboard?.press('Enter');
2320
- await page?.keyboard?.press('ArrowUp');
2321
- return new ActionResult({
2322
- extracted_content: `Inputted text ${params.text}`,
2323
- long_term_memory: `Inputted text '${params.text}' into cell`,
2324
- });
2472
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2473
+ try {
2474
+ await page?.keyboard?.type(params.text, { delay: 100 });
2475
+ await page?.keyboard?.press('Enter');
2476
+ await page?.keyboard?.press('ArrowUp');
2477
+ return new ActionResult({
2478
+ extracted_content: `Inputted text ${params.text}`,
2479
+ long_term_memory: `Inputted text '${params.text}' into cell`,
2480
+ });
2481
+ }
2482
+ finally {
2483
+ await validateBrowserPageAfterAction(browser_session, page, signal);
2484
+ }
2325
2485
  });
2326
2486
  }
2327
2487
  async gotoSheetsRange(page, cell_or_range, signal = null) {
@@ -2428,19 +2588,19 @@ You will be given a query and the markdown of a webpage that has been filtered t
2428
2588
  action(description, options = {}) {
2429
2589
  return this.registry.action(description, options);
2430
2590
  }
2431
- async act(action, { browser_session, page_extraction_llm = null, sensitive_data = null, available_file_paths = null, file_system = null, context = null, signal = null, }) {
2591
+ async act(action, { browser_session, page_extraction_llm = null, sensitive_data = null, available_file_paths = null, file_system = null, context = null, signal = null, action_timeout = null, }) {
2432
2592
  const entries = toActionEntries(action);
2433
2593
  for (const [actionName, params] of entries) {
2434
2594
  try {
2435
- const result = await this.registry.execute_action(actionName, params, {
2595
+ const result = await runActionWithTimeout(actionName, action_timeout, signal, (actionSignal) => this.registry.execute_action(actionName, params, {
2436
2596
  browser_session,
2437
2597
  page_extraction_llm,
2438
2598
  sensitive_data,
2439
2599
  available_file_paths,
2440
2600
  file_system,
2441
2601
  context,
2442
- signal,
2443
- });
2602
+ signal: actionSignal,
2603
+ }));
2444
2604
  if (typeof result === 'string') {
2445
2605
  return new ActionResult({ extracted_content: result });
2446
2606
  }
@@ -2471,6 +2631,11 @@ You will be given a query and the markdown of a webpage that has been filtered t
2471
2631
  }
2472
2632
  throw error;
2473
2633
  }
2634
+ if (isActionTimeoutError(error)) {
2635
+ return new ActionResult({
2636
+ error: error.message,
2637
+ });
2638
+ }
2474
2639
  const message = String(error?.message ?? error ?? '');
2475
2640
  if (error instanceof Error &&
2476
2641
  message === `Error executing action ${actionName} due to timeout.`) {