browser-use 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/agent/service.js +2 -0
  2. package/dist/agent/system_prompt.md +269 -0
  3. package/dist/agent/system_prompt_anthropic_flash.md +240 -0
  4. package/dist/agent/system_prompt_browser_use.md +18 -0
  5. package/dist/agent/system_prompt_browser_use_flash.md +15 -0
  6. package/dist/agent/system_prompt_browser_use_no_thinking.md +17 -0
  7. package/dist/agent/system_prompt_flash.md +16 -0
  8. package/dist/agent/system_prompt_flash_anthropic.md +30 -0
  9. package/dist/agent/system_prompt_no_thinking.md +245 -0
  10. package/dist/browser/cloud/index.d.ts +1 -0
  11. package/dist/browser/cloud/index.js +1 -0
  12. package/dist/browser/cloud/management.d.ts +130 -0
  13. package/dist/browser/cloud/management.js +140 -0
  14. package/dist/browser/events.d.ts +61 -3
  15. package/dist/browser/events.js +66 -0
  16. package/dist/browser/profile.d.ts +1 -0
  17. package/dist/browser/profile.js +1 -0
  18. package/dist/browser/session.d.ts +56 -2
  19. package/dist/browser/session.js +596 -24
  20. package/dist/browser/watchdogs/base.js +34 -1
  21. package/dist/browser/watchdogs/captcha-watchdog.d.ts +26 -0
  22. package/dist/browser/watchdogs/captcha-watchdog.js +151 -0
  23. package/dist/browser/watchdogs/index.d.ts +1 -0
  24. package/dist/browser/watchdogs/index.js +1 -0
  25. package/dist/browser/watchdogs/screenshot-watchdog.js +4 -3
  26. package/dist/cli.d.ts +120 -0
  27. package/dist/cli.js +1816 -4
  28. package/dist/controller/service.js +106 -362
  29. package/dist/controller/views.d.ts +9 -6
  30. package/dist/controller/views.js +8 -5
  31. package/dist/filesystem/file-system.js +1 -1
  32. package/dist/llm/litellm/chat.d.ts +11 -0
  33. package/dist/llm/litellm/chat.js +16 -0
  34. package/dist/llm/litellm/index.d.ts +1 -0
  35. package/dist/llm/litellm/index.js +1 -0
  36. package/dist/llm/models.js +29 -3
  37. package/dist/llm/oci-raw/chat.d.ts +64 -0
  38. package/dist/llm/oci-raw/chat.js +350 -0
  39. package/dist/llm/oci-raw/index.d.ts +2 -0
  40. package/dist/llm/oci-raw/index.js +2 -0
  41. package/dist/llm/oci-raw/serializer.d.ts +12 -0
  42. package/dist/llm/oci-raw/serializer.js +128 -0
  43. package/dist/mcp/server.d.ts +1 -0
  44. package/dist/mcp/server.js +62 -13
  45. package/dist/skill-cli/direct.d.ts +100 -0
  46. package/dist/skill-cli/direct.js +984 -0
  47. package/dist/skill-cli/index.d.ts +2 -0
  48. package/dist/skill-cli/index.js +2 -0
  49. package/dist/skill-cli/server.d.ts +2 -0
  50. package/dist/skill-cli/server.js +472 -11
  51. package/dist/skill-cli/tunnel.d.ts +61 -0
  52. package/dist/skill-cli/tunnel.js +257 -0
  53. package/dist/sync/auth.d.ts +8 -0
  54. package/dist/sync/auth.js +12 -0
  55. package/package.json +22 -4
@@ -2,7 +2,7 @@ import fs from 'node:fs';
2
2
  import os from 'node:os';
3
3
  import path from 'node:path';
4
4
  import { isIP } from 'node:net';
5
- import { execFile } from 'node:child_process';
5
+ import { execFile, execFileSync, } from 'node:child_process';
6
6
  import { promisify } from 'node:util';
7
7
  import { createLogger } from '../logging-config.js';
8
8
  import { match_url_with_domain_pattern, uuid7str } from '../utils.js';
@@ -10,13 +10,14 @@ import { EventBus, } from '../event-bus.js';
10
10
  import { async_playwright, } from './types.js';
11
11
  import { BrowserProfile, CHROME_DOCKER_ARGS, DEFAULT_BROWSER_PROFILE, } from './profile.js';
12
12
  import { BrowserStateSummary, BrowserError, URLNotAllowedError, } from './views.js';
13
- import { AgentFocusChangedEvent, BrowserConnectedEvent, BrowserLaunchEvent, BrowserStartEvent, BrowserStoppedEvent, BrowserStopEvent, DialogOpenedEvent, DownloadProgressEvent, DownloadStartedEvent, FileDownloadedEvent, TabClosedEvent, TabCreatedEvent, } from './events.js';
13
+ import { AgentFocusChangedEvent, BrowserConnectedEvent, BrowserErrorEvent, BrowserLaunchEvent, BrowserReconnectedEvent, BrowserReconnectingEvent, BrowserStartEvent, BrowserStoppedEvent, BrowserStopEvent, DialogOpenedEvent, DownloadProgressEvent, DownloadStartedEvent, FileDownloadedEvent, TabClosedEvent, TabCreatedEvent, } from './events.js';
14
14
  import { DOMElementNode, DOMState } from '../dom/views.js';
15
15
  import { normalize_url } from './utils.js';
16
16
  import { DomService } from '../dom/service.js';
17
17
  import { showDVDScreensaver, showSpinner, withDVDScreensaver, } from './dvd-screensaver.js';
18
18
  import { SessionManager } from './session-manager.js';
19
19
  import { AboutBlankWatchdog } from './watchdogs/aboutblank-watchdog.js';
20
+ import { CaptchaWatchdog, } from './watchdogs/captcha-watchdog.js';
20
21
  import { CDPSessionWatchdog } from './watchdogs/cdp-session-watchdog.js';
21
22
  import { CrashWatchdog } from './watchdogs/crash-watchdog.js';
22
23
  import { DefaultActionWatchdog } from './watchdogs/default-action-watchdog.js';
@@ -31,6 +32,147 @@ import { ScreenshotWatchdog } from './watchdogs/screenshot-watchdog.js';
31
32
  import { SecurityWatchdog } from './watchdogs/security-watchdog.js';
32
33
  import { StorageStateWatchdog } from './watchdogs/storage-state-watchdog.js';
33
34
  const execFileAsync = promisify(execFile);
35
+ const PLAYWRIGHT_OPTION_KEY_OVERRIDES = {
36
+ extra_http_headers: 'extraHTTPHeaders',
37
+ };
38
+ const EMPTY_DOM_RETRY_DELAY_MS = 250;
39
+ const REMOTE_RECONNECT_DELAYS_MS = [1000, 2000, 4000];
40
+ const REMOTE_RECONNECT_ATTEMPT_TIMEOUT_MS = 15_000;
41
+ const cloneBrowserProfileConfig = (profile) => typeof structuredClone === 'function'
42
+ ? structuredClone(profile.config)
43
+ : JSON.parse(JSON.stringify(profile.config));
44
+ const detectSystemChromeVariant = (executablePath) => {
45
+ const normalizedPath = String(executablePath ?? '')
46
+ .trim()
47
+ .toLowerCase();
48
+ if (!normalizedPath) {
49
+ return 'chrome';
50
+ }
51
+ if (normalizedPath.includes('chromium')) {
52
+ return 'chromium';
53
+ }
54
+ if (normalizedPath.includes('chrome canary') ||
55
+ normalizedPath.includes('chrome sxs')) {
56
+ return 'chrome-canary';
57
+ }
58
+ if (normalizedPath.includes('google-chrome-beta')) {
59
+ return 'chrome-beta';
60
+ }
61
+ if (normalizedPath.includes('google-chrome-unstable')) {
62
+ return 'chrome-unstable';
63
+ }
64
+ return 'chrome';
65
+ };
66
+ export const systemChrome = {
67
+ findExecutable() {
68
+ if (process.platform === 'darwin') {
69
+ const candidates = [
70
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
71
+ '/Applications/Chromium.app/Contents/MacOS/Chromium',
72
+ '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
73
+ ];
74
+ return candidates.find((candidate) => fs.existsSync(candidate)) ?? null;
75
+ }
76
+ if (process.platform === 'linux') {
77
+ const commands = [
78
+ 'google-chrome',
79
+ 'google-chrome-stable',
80
+ 'google-chrome-beta',
81
+ 'google-chrome-unstable',
82
+ 'chromium',
83
+ 'chromium-browser',
84
+ ];
85
+ for (const command of commands) {
86
+ try {
87
+ const resolved = execFileSync('which', [command], {
88
+ encoding: 'utf8',
89
+ stdio: ['ignore', 'pipe', 'ignore'],
90
+ }).trim();
91
+ if (resolved) {
92
+ return resolved;
93
+ }
94
+ }
95
+ catch {
96
+ // Ignore missing commands and try the next candidate.
97
+ }
98
+ }
99
+ return null;
100
+ }
101
+ if (process.platform === 'win32') {
102
+ const candidates = [
103
+ path.join(process.env.ProgramFiles ?? 'C:\\Program Files', 'Google', 'Chrome', 'Application', 'chrome.exe'),
104
+ path.join(process.env['ProgramFiles(x86)'] ?? 'C:\\Program Files (x86)', 'Google', 'Chrome', 'Application', 'chrome.exe'),
105
+ path.join(process.env.LOCALAPPDATA ?? '', 'Google', 'Chrome', 'Application', 'chrome.exe'),
106
+ path.join(process.env.LOCALAPPDATA ?? '', 'Google', 'Chrome SxS', 'Application', 'chrome.exe'),
107
+ path.join(process.env.LOCALAPPDATA ?? '', 'Chromium', 'Application', 'chrome.exe'),
108
+ path.join(process.env.ProgramFiles ?? 'C:\\Program Files', 'Chromium', 'Application', 'chrome.exe'),
109
+ path.join(process.env['ProgramFiles(x86)'] ?? 'C:\\Program Files (x86)', 'Chromium', 'Application', 'chrome.exe'),
110
+ ];
111
+ return candidates.find((candidate) => fs.existsSync(candidate)) ?? null;
112
+ }
113
+ return null;
114
+ },
115
+ getUserDataDir(executablePath = systemChrome.findExecutable()) {
116
+ const variant = detectSystemChromeVariant(executablePath);
117
+ if (process.platform === 'darwin') {
118
+ const applicationSupportDir = path.join(os.homedir(), 'Library', 'Application Support');
119
+ if (variant === 'chromium') {
120
+ return path.join(applicationSupportDir, 'Chromium');
121
+ }
122
+ if (variant === 'chrome-canary') {
123
+ return path.join(applicationSupportDir, 'Google', 'Chrome Canary');
124
+ }
125
+ return path.join(applicationSupportDir, 'Google', 'Chrome');
126
+ }
127
+ if (process.platform === 'linux') {
128
+ if (variant === 'chromium') {
129
+ return path.join(os.homedir(), '.config', 'chromium');
130
+ }
131
+ if (variant === 'chrome-beta') {
132
+ return path.join(os.homedir(), '.config', 'google-chrome-beta');
133
+ }
134
+ if (variant === 'chrome-unstable') {
135
+ return path.join(os.homedir(), '.config', 'google-chrome-unstable');
136
+ }
137
+ return path.join(os.homedir(), '.config', 'google-chrome');
138
+ }
139
+ if (process.platform === 'win32') {
140
+ const localAppData = process.env.LOCALAPPDATA ?? path.join(os.homedir(), 'AppData', 'Local');
141
+ if (variant === 'chromium') {
142
+ return path.join(localAppData, 'Chromium', 'User Data');
143
+ }
144
+ if (variant === 'chrome-canary') {
145
+ return path.join(localAppData, 'Google', 'Chrome SxS', 'User Data');
146
+ }
147
+ return path.join(localAppData, 'Google', 'Chrome', 'User Data');
148
+ }
149
+ return null;
150
+ },
151
+ listProfiles(userDataDir = systemChrome.getUserDataDir()) {
152
+ if (!userDataDir) {
153
+ return [];
154
+ }
155
+ const localStatePath = path.join(userDataDir, 'Local State');
156
+ if (!fs.existsSync(localStatePath)) {
157
+ return [];
158
+ }
159
+ try {
160
+ const raw = fs.readFileSync(localStatePath, 'utf8');
161
+ const localState = JSON.parse(raw);
162
+ const infoCache = localState.profile?.info_cache ?? {};
163
+ return Object.entries(infoCache)
164
+ .map(([directory, info]) => ({
165
+ directory,
166
+ name: info?.name || directory,
167
+ email: info?.user_name || '',
168
+ }))
169
+ .sort((left, right) => left.directory.localeCompare(right.directory));
170
+ }
171
+ catch {
172
+ return [];
173
+ }
174
+ },
175
+ };
34
176
  const createEmptyDomState = () => {
35
177
  const root = new DOMElementNode(true, null, 'html', '/html[1]', {}, []);
36
178
  return new DOMState(root, {});
@@ -77,11 +219,18 @@ export class BrowserSession {
77
219
  _maxRecentEvents = 100;
78
220
  _watchdogs = new Set();
79
221
  _defaultWatchdogsAttached = false;
222
+ _captchaWatchdog = null;
223
+ RECONNECT_WAIT_TIMEOUT = 54;
224
+ _reconnecting = false;
225
+ _reconnectTask = null;
226
+ _reconnectWaitPromise = Promise.resolve();
227
+ _resolveReconnectWait = null;
228
+ _intentionalStop = false;
229
+ _disconnectAwareBrowser = null;
230
+ _browserDisconnectHandler = null;
80
231
  constructor(init = {}) {
81
232
  const sourceProfileConfig = init.browser_profile
82
- ? typeof structuredClone === 'function'
83
- ? structuredClone(init.browser_profile.config)
84
- : JSON.parse(JSON.stringify(init.browser_profile.config))
233
+ ? cloneBrowserProfileConfig(init.browser_profile)
85
234
  : (init.profile ?? {});
86
235
  this.browser_profile = new BrowserProfile(sourceProfileConfig);
87
236
  this.id = init.id ?? uuid7str();
@@ -121,12 +270,56 @@ export class BrowserSession {
121
270
  this._attachDialogHandler(this.agent_current_page);
122
271
  this._recordRecentEvent('session_initialized', { url: this.currentUrl });
123
272
  }
273
+ static from_system_chrome(init = {}) {
274
+ const executablePath = systemChrome.findExecutable();
275
+ if (!executablePath) {
276
+ throw new Error('Chrome not found. Please install Chrome or use BrowserSession with an explicit executable_path.\n' +
277
+ 'Expected locations:\n' +
278
+ ' macOS: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome\n' +
279
+ ' Linux: /usr/bin/google-chrome or /usr/bin/chromium\n' +
280
+ ' Windows: C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe');
281
+ }
282
+ const userDataDir = systemChrome.getUserDataDir(executablePath);
283
+ if (!userDataDir) {
284
+ throw new Error('Could not detect Chrome profile directory for your platform.\n' +
285
+ 'Expected locations:\n' +
286
+ ' macOS: ~/Library/Application Support/Google/Chrome\n' +
287
+ ' Linux: ~/.config/google-chrome\n' +
288
+ ' Windows: %LocalAppData%\\Google\\Chrome\\User Data');
289
+ }
290
+ const availableProfiles = systemChrome.listProfiles(userDataDir);
291
+ const selectedProfileDirectory = init.profile_directory ?? availableProfiles[0]?.directory ?? 'Default';
292
+ if (typeof init.profile_directory === 'undefined' && availableProfiles[0]) {
293
+ createLogger('browser_use.browser.session').info(`Auto-selected Chrome profile: ${availableProfiles[0].name} (${availableProfiles[0].directory})`);
294
+ }
295
+ const sourceProfileConfig = init.browser_profile
296
+ ? cloneBrowserProfileConfig(init.browser_profile)
297
+ : (init.profile ?? {});
298
+ const { browser_profile: _browserProfile, profile: _profile, profile_directory: _profileDirectory, ...sessionInit } = init;
299
+ return new BrowserSession({
300
+ ...sessionInit,
301
+ browser_profile: new BrowserProfile({
302
+ ...sourceProfileConfig,
303
+ executable_path: executablePath,
304
+ user_data_dir: userDataDir,
305
+ profile_directory: selectedProfileDirectory,
306
+ }),
307
+ });
308
+ }
309
+ static list_chrome_profiles() {
310
+ const executablePath = systemChrome.findExecutable();
311
+ const userDataDir = systemChrome.getUserDataDir(executablePath);
312
+ return systemChrome.listProfiles(userDataDir);
313
+ }
124
314
  attach_watchdog(watchdog) {
125
315
  if (this._watchdogs.has(watchdog)) {
126
316
  return;
127
317
  }
128
318
  watchdog.attach_to_session();
129
319
  this._watchdogs.add(watchdog);
320
+ if (watchdog instanceof CaptchaWatchdog) {
321
+ this._captchaWatchdog = watchdog;
322
+ }
130
323
  }
131
324
  attach_watchdogs(watchdogs) {
132
325
  for (const watchdog of watchdogs) {
@@ -139,12 +332,16 @@ export class BrowserSession {
139
332
  }
140
333
  watchdog.detach_from_session();
141
334
  this._watchdogs.delete(watchdog);
335
+ if (watchdog === this._captchaWatchdog) {
336
+ this._captchaWatchdog = null;
337
+ }
142
338
  }
143
339
  detach_all_watchdogs() {
144
340
  for (const watchdog of [...this._watchdogs]) {
145
341
  this.detach_watchdog(watchdog);
146
342
  }
147
343
  this._defaultWatchdogsAttached = false;
344
+ this._captchaWatchdog = null;
148
345
  }
149
346
  get_watchdogs() {
150
347
  return [...this._watchdogs];
@@ -179,6 +376,10 @@ export class BrowserSession {
179
376
  new StorageStateWatchdog({ browser_session: this }),
180
377
  new DefaultActionWatchdog({ browser_session: this }),
181
378
  ];
379
+ if (this.browser_profile.config.captcha_solver) {
380
+ this._captchaWatchdog = new CaptchaWatchdog({ browser_session: this });
381
+ watchdogs.push(this._captchaWatchdog);
382
+ }
182
383
  const configuredHarPath = this.browser_profile.config.record_har_path;
183
384
  if (typeof configuredHarPath === 'string' &&
184
385
  configuredHarPath.trim().length > 0) {
@@ -187,6 +388,9 @@ export class BrowserSession {
187
388
  this.attach_watchdogs(watchdogs);
188
389
  this._defaultWatchdogsAttached = true;
189
390
  }
391
+ async wait_if_captcha_solving(timeoutSeconds) {
392
+ return (this._captchaWatchdog?.wait_if_captcha_solving(timeoutSeconds) ?? null);
393
+ }
190
394
  _formatTabId(pageId) {
191
395
  const normalized = Number.isFinite(pageId) && pageId >= 0 ? Math.floor(pageId) : 0;
192
396
  return String(normalized).padStart(4, '0').slice(-4);
@@ -492,7 +696,7 @@ export class BrowserSession {
492
696
  this.tabPages = nextTabPages;
493
697
  const activePage = this.agent_current_page && pages.includes(this.agent_current_page)
494
698
  ? this.agent_current_page
495
- : pages[0] ?? null;
699
+ : (pages[0] ?? null);
496
700
  if (activePage) {
497
701
  const activeIndex = this._tabs.findIndex((tab) => this.tabPages.get(tab.page_id) === activePage);
498
702
  if (activeIndex !== -1) {
@@ -507,7 +711,8 @@ export class BrowserSession {
507
711
  this.currentUrl = activeTab.url;
508
712
  this.currentTitle = activeTab.title || activeTab.url;
509
713
  this.agent_current_page = this.tabPages.get(activeTab.page_id) ?? null;
510
- this.human_current_page = this.human_current_page ?? this.agent_current_page;
714
+ this.human_current_page =
715
+ this.human_current_page ?? this.agent_current_page;
511
716
  }
512
717
  this._syncSessionManagerFromTabs();
513
718
  }
@@ -711,6 +916,65 @@ export class BrowserSession {
711
916
  get is_stopping() {
712
917
  return this._stoppingPromise !== null;
713
918
  }
919
+ get is_reconnecting() {
920
+ return this._reconnecting;
921
+ }
922
+ get should_gate_watchdog_events() {
923
+ return Boolean(this.initialized ||
924
+ this.browser ||
925
+ this.browser_context ||
926
+ this.cdp_url ||
927
+ this.wss_url ||
928
+ this._reconnecting);
929
+ }
930
+ get is_cdp_connected() {
931
+ try {
932
+ if (this.browser) {
933
+ const browser = this.browser;
934
+ if (typeof browser.isConnected === 'function' &&
935
+ !browser.isConnected()) {
936
+ return false;
937
+ }
938
+ }
939
+ if (this.browser_context) {
940
+ const contextBrowser = this.browser_context.browser?.();
941
+ if (contextBrowser &&
942
+ typeof contextBrowser.isConnected === 'function' &&
943
+ !contextBrowser.isConnected()) {
944
+ return false;
945
+ }
946
+ return true;
947
+ }
948
+ return Boolean(this.browser);
949
+ }
950
+ catch {
951
+ return false;
952
+ }
953
+ }
954
+ async wait_for_reconnect(timeoutSeconds = this.RECONNECT_WAIT_TIMEOUT) {
955
+ if (!this._reconnecting) {
956
+ return;
957
+ }
958
+ const timeoutMs = Number.isFinite(timeoutSeconds) && timeoutSeconds > 0
959
+ ? timeoutSeconds * 1000
960
+ : this.RECONNECT_WAIT_TIMEOUT * 1000;
961
+ let timeoutHandle = null;
962
+ try {
963
+ await Promise.race([
964
+ this._reconnectWaitPromise,
965
+ new Promise((_, reject) => {
966
+ timeoutHandle = setTimeout(() => {
967
+ reject(new Error(`Reconnection wait timed out after ${Math.round(timeoutMs / 1000)}s`));
968
+ }, timeoutMs);
969
+ }),
970
+ ]);
971
+ }
972
+ finally {
973
+ if (timeoutHandle) {
974
+ clearTimeout(timeoutHandle);
975
+ }
976
+ }
977
+ }
714
978
  claim_agent(agentId, mode = 'exclusive') {
715
979
  if (!agentId) {
716
980
  return false;
@@ -896,11 +1160,276 @@ export class BrowserSession {
896
1160
  if (convertedValue === undefined) {
897
1161
  continue;
898
1162
  }
899
- const normalizedKey = rawKey.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
1163
+ const normalizedKey = PLAYWRIGHT_OPTION_KEY_OVERRIDES[rawKey] ??
1164
+ rawKey.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
900
1165
  result[normalizedKey] = convertedValue;
901
1166
  }
902
1167
  return result;
903
1168
  }
1169
+ async set_extra_headers(headers) {
1170
+ const normalizedHeaders = Object.fromEntries(Object.entries(headers)
1171
+ .map(([key, value]) => [String(key).trim(), String(value)])
1172
+ .filter(([key]) => key.length > 0));
1173
+ if (!this.browser_context ||
1174
+ Object.keys(normalizedHeaders).length === 0 ||
1175
+ typeof this.browser_context.setExtraHTTPHeaders !== 'function') {
1176
+ return;
1177
+ }
1178
+ await this.browser_context.setExtraHTTPHeaders(normalizedHeaders);
1179
+ }
1180
+ async _applyConfiguredExtraHttpHeaders() {
1181
+ const configuredHeaders = this.browser_profile.config.extra_http_headers;
1182
+ if (!configuredHeaders || Object.keys(configuredHeaders).length === 0) {
1183
+ return;
1184
+ }
1185
+ await this.set_extra_headers(configuredHeaders);
1186
+ }
1187
+ _usesRemoteBrowserConnection() {
1188
+ return Boolean(this.cdp_url || this.wss_url);
1189
+ }
1190
+ async _connectToConfiguredBrowser(playwright) {
1191
+ const connectOptions = this._toPlaywrightOptions(this.browser_profile.kwargs_for_connect());
1192
+ if (this.cdp_url) {
1193
+ return await playwright.chromium.connectOverCDP(this.cdp_url, connectOptions ?? {});
1194
+ }
1195
+ if (this.wss_url) {
1196
+ return await playwright.chromium.connect(this.wss_url, connectOptions ?? {});
1197
+ }
1198
+ throw new Error('Cannot connect to a remote browser without cdp_url or wss_url');
1199
+ }
1200
+ async _ensureBrowserContextFromBrowser(browser) {
1201
+ const existingContexts = (typeof browser?.contexts === 'function' ? browser.contexts() : []) ?? [];
1202
+ if (existingContexts.length > 0) {
1203
+ return existingContexts[0] ?? null;
1204
+ }
1205
+ if (typeof browser?.newContext === 'function') {
1206
+ const contextOptions = this._toPlaywrightOptions(this.browser_profile.kwargs_for_new_context());
1207
+ return await browser.newContext(contextOptions ?? {});
1208
+ }
1209
+ return null;
1210
+ }
1211
+ _beginReconnectWait() {
1212
+ this._reconnectWaitPromise = new Promise((resolve) => {
1213
+ this._resolveReconnectWait = resolve;
1214
+ });
1215
+ }
1216
+ _endReconnectWait() {
1217
+ this._resolveReconnectWait?.();
1218
+ this._resolveReconnectWait = null;
1219
+ this._reconnectWaitPromise = Promise.resolve();
1220
+ }
1221
+ _detachRemoteDisconnectHandler() {
1222
+ if (!this._disconnectAwareBrowser || !this._browserDisconnectHandler) {
1223
+ this._disconnectAwareBrowser = null;
1224
+ this._browserDisconnectHandler = null;
1225
+ return;
1226
+ }
1227
+ if (typeof this._disconnectAwareBrowser.off === 'function') {
1228
+ this._disconnectAwareBrowser.off('disconnected', this._browserDisconnectHandler);
1229
+ }
1230
+ else if (typeof this._disconnectAwareBrowser.removeListener === 'function') {
1231
+ this._disconnectAwareBrowser.removeListener('disconnected', this._browserDisconnectHandler);
1232
+ }
1233
+ this._disconnectAwareBrowser = null;
1234
+ this._browserDisconnectHandler = null;
1235
+ }
1236
+ _attachRemoteDisconnectHandler(browser) {
1237
+ this._detachRemoteDisconnectHandler();
1238
+ if (!this._usesRemoteBrowserConnection()) {
1239
+ return;
1240
+ }
1241
+ const browserWithEvents = browser;
1242
+ if (!browserWithEvents || typeof browserWithEvents.on !== 'function') {
1243
+ return;
1244
+ }
1245
+ const onDisconnected = () => {
1246
+ this._handleUnexpectedRemoteDisconnect();
1247
+ };
1248
+ browserWithEvents.on('disconnected', onDisconnected);
1249
+ this._disconnectAwareBrowser = browserWithEvents;
1250
+ this._browserDisconnectHandler = onDisconnected;
1251
+ }
1252
+ _handleUnexpectedRemoteDisconnect() {
1253
+ if (this._intentionalStop ||
1254
+ this._reconnecting ||
1255
+ !this._usesRemoteBrowserConnection()) {
1256
+ return;
1257
+ }
1258
+ this.logger.warning('Remote browser connection closed unexpectedly; attempting to reconnect');
1259
+ this._recordRecentEvent('browser_disconnected', {
1260
+ url: this.currentUrl,
1261
+ });
1262
+ const reconnectTask = this._auto_reconnect();
1263
+ this._reconnectTask = reconnectTask;
1264
+ void reconnectTask.finally(() => {
1265
+ if (this._reconnectTask === reconnectTask) {
1266
+ this._reconnectTask = null;
1267
+ }
1268
+ });
1269
+ }
1270
+ async _restorePagesAfterReconnect(preferredUrl, preferredTabIndex) {
1271
+ if (!this.browser_context) {
1272
+ this.agent_current_page = null;
1273
+ this.human_current_page = null;
1274
+ return;
1275
+ }
1276
+ let pages = this.browser_context.pages?.() ?? [];
1277
+ if (!pages.length && typeof this.browser_context.newPage === 'function') {
1278
+ const createdPage = await this.browser_context.newPage();
1279
+ if (createdPage) {
1280
+ pages = this.browser_context.pages?.() ?? [createdPage];
1281
+ }
1282
+ }
1283
+ this.tabPages = new Map();
1284
+ this.agent_current_page = null;
1285
+ this.human_current_page = null;
1286
+ this._syncTabsWithBrowserPages();
1287
+ if (!pages.length) {
1288
+ this.currentTabIndex = 0;
1289
+ this.currentUrl = normalize_url(preferredUrl ?? 'about:blank');
1290
+ this.currentTitle = this.currentUrl;
1291
+ if (!this._tabs.length) {
1292
+ this._tabs = [
1293
+ this._createTabInfo({
1294
+ page_id: this._tabCounter++,
1295
+ url: this.currentUrl,
1296
+ title: this.currentTitle,
1297
+ }),
1298
+ ];
1299
+ }
1300
+ this._syncSessionManagerFromTabs();
1301
+ return;
1302
+ }
1303
+ const normalizedPreferredUrl = typeof preferredUrl === 'string' && preferredUrl.trim().length > 0
1304
+ ? normalize_url(preferredUrl)
1305
+ : null;
1306
+ const pageByUrl = normalizedPreferredUrl == null
1307
+ ? null
1308
+ : (pages.find((page) => {
1309
+ try {
1310
+ return normalize_url(page.url()) === normalizedPreferredUrl;
1311
+ }
1312
+ catch {
1313
+ return false;
1314
+ }
1315
+ }) ?? null);
1316
+ const clampedIndex = preferredTabIndex >= 0 && preferredTabIndex < pages.length
1317
+ ? preferredTabIndex
1318
+ : 0;
1319
+ const nextPage = pageByUrl ?? pages[clampedIndex] ?? pages[0] ?? null;
1320
+ const nextTabIndex = nextPage
1321
+ ? this._tabs.findIndex((tab) => this.tabPages.get(tab.page_id) === nextPage)
1322
+ : -1;
1323
+ if (nextTabIndex >= 0) {
1324
+ this.currentTabIndex = nextTabIndex;
1325
+ }
1326
+ this._setActivePage(nextPage);
1327
+ this.human_current_page = nextPage;
1328
+ await this._syncCurrentTabFromPage(nextPage);
1329
+ }
1330
+ async reconnect(options = {}) {
1331
+ if (!this._usesRemoteBrowserConnection()) {
1332
+ throw new Error('Cannot reconnect without a remote browser connection');
1333
+ }
1334
+ const preferredUrl = typeof options.preferred_url === 'string'
1335
+ ? options.preferred_url
1336
+ : this.currentUrl;
1337
+ const preferredTabIndex = typeof options.preferred_tab_index === 'number'
1338
+ ? options.preferred_tab_index
1339
+ : this.currentTabIndex;
1340
+ this._detachRemoteDisconnectHandler();
1341
+ this.cachedBrowserState = null;
1342
+ this.currentPageLoadingStatus = null;
1343
+ this.browser = null;
1344
+ this.browser_context = null;
1345
+ this.agent_current_page = null;
1346
+ this.human_current_page = null;
1347
+ this._dialogHandlersAttached = new WeakSet();
1348
+ this.session_manager.clear();
1349
+ const playwright = this.playwright ?? (await async_playwright());
1350
+ this.playwright = playwright;
1351
+ this.browser = await this._connectToConfiguredBrowser(playwright);
1352
+ this.ownsBrowserResources = false;
1353
+ this.browser_context = await this._ensureBrowserContextFromBrowser(this.browser);
1354
+ await this._applyConfiguredExtraHttpHeaders();
1355
+ await this._restorePagesAfterReconnect(preferredUrl, preferredTabIndex);
1356
+ this._attachRemoteDisconnectHandler(this.browser);
1357
+ this.initialized = true;
1358
+ this._recordRecentEvent('browser_reconnected', {
1359
+ url: this.currentUrl,
1360
+ });
1361
+ }
1362
+ async _auto_reconnect(maxAttempts = 3) {
1363
+ if (this._reconnecting || !this._usesRemoteBrowserConnection()) {
1364
+ return;
1365
+ }
1366
+ this._reconnecting = true;
1367
+ this._beginReconnectWait();
1368
+ const startTime = Date.now();
1369
+ const preferredUrl = this.currentUrl;
1370
+ const preferredTabIndex = this.currentTabIndex;
1371
+ try {
1372
+ await this.event_bus.dispatch(new BrowserStoppedEvent({
1373
+ reason: 'connection_lost',
1374
+ }));
1375
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
1376
+ if (this._intentionalStop) {
1377
+ return;
1378
+ }
1379
+ await this.event_bus.dispatch(new BrowserReconnectingEvent({
1380
+ cdp_url: this.cdp_url ?? this.wss_url ?? 'remote',
1381
+ attempt,
1382
+ max_attempts: maxAttempts,
1383
+ }));
1384
+ try {
1385
+ await Promise.race([
1386
+ this.reconnect({
1387
+ preferred_url: preferredUrl,
1388
+ preferred_tab_index: preferredTabIndex,
1389
+ }),
1390
+ new Promise((_, reject) => {
1391
+ setTimeout(() => {
1392
+ reject(new Error(`Reconnect attempt timed out after ${Math.round(REMOTE_RECONNECT_ATTEMPT_TIMEOUT_MS / 1000)}s`));
1393
+ }, REMOTE_RECONNECT_ATTEMPT_TIMEOUT_MS);
1394
+ }),
1395
+ ]);
1396
+ if (this._intentionalStop) {
1397
+ return;
1398
+ }
1399
+ await this.event_bus.dispatch(new BrowserConnectedEvent({
1400
+ cdp_url: this.cdp_url ?? this.wss_url ?? 'remote',
1401
+ }));
1402
+ await this.event_bus.dispatch(new BrowserReconnectedEvent({
1403
+ cdp_url: this.cdp_url ?? this.wss_url ?? 'remote',
1404
+ attempt,
1405
+ downtime_seconds: (Date.now() - startTime) / 1000,
1406
+ }));
1407
+ return;
1408
+ }
1409
+ catch (error) {
1410
+ this.logger.warning(`Reconnect attempt ${attempt}/${maxAttempts} failed: ${error.message}`);
1411
+ if (attempt >= maxAttempts) {
1412
+ break;
1413
+ }
1414
+ const delayMs = REMOTE_RECONNECT_DELAYS_MS[attempt - 1] ??
1415
+ REMOTE_RECONNECT_DELAYS_MS[REMOTE_RECONNECT_DELAYS_MS.length - 1];
1416
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1417
+ }
1418
+ }
1419
+ await this.event_bus.dispatch(new BrowserErrorEvent({
1420
+ error_type: 'ReconnectionFailed',
1421
+ message: `Failed to reconnect after ${maxAttempts} attempts (${((Date.now() - startTime) / 1000).toFixed(1)}s)`,
1422
+ details: {
1423
+ cdp_url: this.cdp_url ?? this.wss_url ?? 'remote',
1424
+ max_attempts: maxAttempts,
1425
+ },
1426
+ }));
1427
+ }
1428
+ finally {
1429
+ this._reconnecting = false;
1430
+ this._endReconnectWait();
1431
+ }
1432
+ }
904
1433
  _isSandboxLaunchError(error) {
905
1434
  const message = error instanceof Error ? error.message : String(error);
906
1435
  return (/no usable sandbox/i.test(message) ||
@@ -957,6 +1486,7 @@ export class BrowserSession {
957
1486
  }
958
1487
  async start() {
959
1488
  this.attach_default_watchdogs();
1489
+ this._intentionalStop = false;
960
1490
  if (this.initialized) {
961
1491
  return this;
962
1492
  }
@@ -989,12 +1519,11 @@ export class BrowserSession {
989
1519
  const playwright = this.playwright ?? (await async_playwright());
990
1520
  this.playwright = playwright;
991
1521
  if (this.cdp_url) {
992
- this.browser = await playwright.chromium.connectOverCDP(this.cdp_url);
1522
+ this.browser = await this._connectToConfiguredBrowser(playwright);
993
1523
  this.ownsBrowserResources = false;
994
1524
  }
995
1525
  else if (this.wss_url) {
996
- const connectOptions = this._toPlaywrightOptions(this.browser_profile.kwargs_for_connect());
997
- this.browser = await playwright.chromium.connect(this.wss_url, connectOptions ?? {});
1526
+ this.browser = await this._connectToConfiguredBrowser(playwright);
998
1527
  this.ownsBrowserResources = false;
999
1528
  }
1000
1529
  else {
@@ -1016,14 +1545,11 @@ export class BrowserSession {
1016
1545
  if (existingContexts.length > 0) {
1017
1546
  this.browser_context = existingContexts[0] ?? null;
1018
1547
  }
1019
- else if (typeof this.browser?.newContext === 'function') {
1020
- const contextOptions = this._toPlaywrightOptions(this.browser_profile.kwargs_for_new_context());
1021
- this.browser_context = await this.browser.newContext(contextOptions ?? {});
1022
- }
1023
1548
  else {
1024
- this.browser_context = null;
1549
+ this.browser_context = await this._ensureBrowserContextFromBrowser(this.browser);
1025
1550
  }
1026
1551
  }
1552
+ await this._applyConfiguredExtraHttpHeaders();
1027
1553
  await ensurePage();
1028
1554
  if (!this.human_current_page ||
1029
1555
  this.human_current_page.isClosed?.()) {
@@ -1049,6 +1575,7 @@ export class BrowserSession {
1049
1575
  this.initialized = true;
1050
1576
  this._recordRecentEvent('browser_started', { url: this.currentUrl });
1051
1577
  this.logger.debug(`Started ${this.describe()} with profile ${this.browser_profile.toString()}`);
1578
+ this._attachRemoteDisconnectHandler(this.browser);
1052
1579
  await this.event_bus.dispatch(new BrowserConnectedEvent({
1053
1580
  cdp_url: this.cdp_url ?? this.wss_url ?? 'playwright',
1054
1581
  }));
@@ -1075,7 +1602,7 @@ export class BrowserSession {
1075
1602
  // Connect to browser via CDP
1076
1603
  try {
1077
1604
  const playwright = await import('playwright');
1078
- const browser = await playwright.chromium.connectOverCDP(cdpUrl);
1605
+ const browser = await this._connectToConfiguredBrowser(playwright);
1079
1606
  this.browser = browser;
1080
1607
  this.playwright = playwright;
1081
1608
  // Get or create context
@@ -1086,6 +1613,7 @@ export class BrowserSession {
1086
1613
  else {
1087
1614
  this.browser_context = (await browser.newContext());
1088
1615
  }
1616
+ await this._applyConfiguredExtraHttpHeaders();
1089
1617
  // Get or create page
1090
1618
  if (!this.browser_context) {
1091
1619
  throw new Error('Browser context not available');
@@ -1102,6 +1630,7 @@ export class BrowserSession {
1102
1630
  }
1103
1631
  // We don't own this browser since we're connecting to existing one
1104
1632
  this.ownsBrowserResources = false;
1633
+ this._attachRemoteDisconnectHandler(this.browser);
1105
1634
  this.initialized = true;
1106
1635
  this.logger.info(`Successfully connected to browser PID ${browserPid}`);
1107
1636
  }
@@ -1136,6 +1665,11 @@ export class BrowserSession {
1136
1665
  }
1137
1666
  async _shutdown_browser_session() {
1138
1667
  this.initialized = false;
1668
+ this._intentionalStop = true;
1669
+ this._reconnecting = false;
1670
+ this._endReconnectWait();
1671
+ this._reconnectTask = null;
1672
+ this._detachRemoteDisconnectHandler();
1139
1673
  this.attachedAgentId = null;
1140
1674
  this.attachedSharedAgentIds.clear();
1141
1675
  const closeWithTimeout = async (label, operation, timeoutMs = 3000) => {
@@ -1221,6 +1755,29 @@ export class BrowserSession {
1221
1755
  this.logger.debug(`Failed to build DOM tree: ${error.message}`);
1222
1756
  domState = createEmptyDomState();
1223
1757
  }
1758
+ const liveUrl = typeof page.url === 'function'
1759
+ ? normalize_url(page.url())
1760
+ : this.currentUrl;
1761
+ const shouldRetryEmptyDom = Object.keys(domState.selector_map).length === 0 &&
1762
+ !this._is_new_tab_page(liveUrl) &&
1763
+ !liveUrl.toLowerCase().endsWith('.pdf');
1764
+ if (shouldRetryEmptyDom) {
1765
+ this.logger.debug(`Empty DOM detected for ${liveUrl}; retrying once`);
1766
+ await this._waitWithAbort(EMPTY_DOM_RETRY_DELAY_MS, signal);
1767
+ try {
1768
+ const retryDomService = new DomService(page, this.logger);
1769
+ const retriedDomState = await this._withAbort(retryDomService.get_clickable_elements(this.browser_profile.highlight_elements, -1, this.browser_profile.viewport_expansion), signal);
1770
+ if (Object.keys(retriedDomState.selector_map).length > 0) {
1771
+ domState = retriedDomState;
1772
+ }
1773
+ }
1774
+ catch (error) {
1775
+ if (this._isAbortError(error)) {
1776
+ throw error;
1777
+ }
1778
+ this.logger.debug(`Retry after empty DOM failed: ${error.message}`);
1779
+ }
1780
+ }
1224
1781
  }
1225
1782
  let screenshot = null;
1226
1783
  if (options.include_screenshot && page?.screenshot) {
@@ -2624,11 +3181,12 @@ export class BrowserSession {
2624
3181
  }
2625
3182
  // ==================== Screenshots ====================
2626
3183
  /**
2627
- * Take a screenshot of the current page
3184
+ * Take a screenshot of the current page.
2628
3185
  * @param full_page Whether to capture the full scrollable page
3186
+ * @param clip Optional clip region for partial screenshots
2629
3187
  * @returns Base64 encoded PNG screenshot
2630
3188
  */
2631
- async take_screenshot(full_page = false) {
3189
+ async take_screenshot(full_page = false, clip = null) {
2632
3190
  const page = await this.get_current_page();
2633
3191
  if (!page) {
2634
3192
  throw new Error('No page available for screenshot');
@@ -2659,11 +3217,21 @@ export class BrowserSession {
2659
3217
  // Create CDP session for the screenshot
2660
3218
  cdp_session = await this.get_or_create_cdp_session(page);
2661
3219
  // Capture screenshot via CDP
2662
- const screenshot_response = await cdp_session.send('Page.captureScreenshot', {
3220
+ const screenshotParams = {
2663
3221
  captureBeyondViewport: full_page,
2664
3222
  fromSurface: true,
2665
3223
  format: 'png',
2666
- });
3224
+ };
3225
+ if (clip) {
3226
+ screenshotParams.clip = {
3227
+ x: clip.x,
3228
+ y: clip.y,
3229
+ width: clip.width,
3230
+ height: clip.height,
3231
+ scale: 1,
3232
+ };
3233
+ }
3234
+ const screenshot_response = await cdp_session.send('Page.captureScreenshot', screenshotParams);
2667
3235
  const screenshot_b64 = screenshot_response.data;
2668
3236
  if (!screenshot_b64) {
2669
3237
  throw new Error(`CDP returned empty screenshot data for page ${url}`);
@@ -2757,7 +3325,8 @@ export class BrowserSession {
2757
3325
  }
2758
3326
  }
2759
3327
  // Skip chrome:// pages and new tab pages
2760
- const isNewTab = currentUrl === 'about:blank' || currentUrl.startsWith('chrome://newtab');
3328
+ const isNewTab = currentUrl === 'about:blank' ||
3329
+ currentUrl.startsWith('chrome://newtab');
2761
3330
  if (isNewTab || currentUrl.startsWith('chrome://')) {
2762
3331
  if (isNewTab) {
2763
3332
  tabs_info.push({
@@ -3636,7 +4205,8 @@ export class BrowserSession {
3636
4205
  catch (error) {
3637
4206
  const message = error instanceof Error ? error.message : String(error);
3638
4207
  const isDownloadTimeout = error instanceof Error &&
3639
- (error.name === 'TimeoutError' || message.toLowerCase().includes('timeout'));
4208
+ (error.name === 'TimeoutError' ||
4209
+ message.toLowerCase().includes('timeout'));
3640
4210
  if (!isDownloadTimeout) {
3641
4211
  throw error;
3642
4212
  }
@@ -3654,7 +4224,9 @@ export class BrowserSession {
3654
4224
  const unique_filename = await BrowserSession.get_unique_filename(downloads_path, suggested_filename);
3655
4225
  const download_path = path.join(downloads_path, unique_filename);
3656
4226
  const download_guid = uuid7str();
3657
- const download_url = typeof download.url === 'function' ? download.url() : (this.currentUrl ?? '');
4227
+ const download_url = typeof download.url === 'function'
4228
+ ? download.url()
4229
+ : (this.currentUrl ?? '');
3658
4230
  await this.event_bus.dispatch(new DownloadStartedEvent({
3659
4231
  guid: download_guid,
3660
4232
  url: download_url,