browser-use 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -95,13 +95,13 @@ npx browser-use --mcp
95
95
 
96
96
  ```
97
97
  ┌─────────────────────────────────────────────────────┐
98
- │ Browser-Use
98
+ │ Browser-Use
99
99
  ├─────────────────────────────────────────────────────┤
100
- │ Agent ← MessageManager ← LLM Providers
101
- │ ↓
100
+ │ Agent ← MessageManager ← LLM Providers
101
+ │ ↓
102
102
  │ Controller → Action Registry → BrowserSession │
103
- │ ↓
104
- │ DomService
103
+ │ ↓
104
+ │ DomService
105
105
  └─────────────────────────────────────────────────────┘
106
106
  ```
107
107
 
@@ -396,23 +396,23 @@ const agent = new Agent({
396
396
 
397
397
  ```bash
398
398
  # Install dependencies
399
- npm install
399
+ pnpm install
400
400
 
401
401
  # Build
402
- npm run build
402
+ pnpm build
403
403
 
404
404
  # Run tests
405
- npm test
405
+ pnpm test
406
406
 
407
407
  # Lint & format
408
- npm run lint
409
- npm run prettier
408
+ pnpm lint
409
+ pnpm prettier
410
410
 
411
411
  # Type checking
412
- npm run typecheck
412
+ pnpm typecheck
413
413
 
414
414
  # Run an example
415
- npx tsx examples/simple-search.ts
415
+ pnpm exec tsx examples/simple-search.ts
416
416
  ```
417
417
 
418
418
  ## Requirements
@@ -7,7 +7,7 @@ export declare class Page {
7
7
  private _mouse;
8
8
  constructor(browser_session: BrowserSession);
9
9
  get mouse(): Mouse;
10
- _currentPage(): Promise<import("playwright-core").Page>;
10
+ _currentPage(): Promise<import("playwright").Page>;
11
11
  get_url(): Promise<string>;
12
12
  get_title(): Promise<string>;
13
13
  goto(url: string, options?: {
@@ -332,6 +332,10 @@ export declare class Agent<Context = ControllerContext, AgentStructuredOutput =
332
332
  private _replace_shortened_urls_in_value;
333
333
  private _parseCompletionPayload;
334
334
  private _isModelActionMissing;
335
+ private _getOutputActionNames;
336
+ private _toStrictActionParamSchema;
337
+ private _buildActionOutputSchema;
338
+ private _buildLlmOutputFormat;
335
339
  private _get_model_output_with_retry;
336
340
  private _try_switch_to_fallback_llm;
337
341
  private _log_fallback_switch;
@@ -3633,13 +3633,83 @@ export class Agent {
3633
3633
  return Object.keys(candidate).length === 0;
3634
3634
  });
3635
3635
  }
3636
+ _getOutputActionNames(doneOnly) {
3637
+ const registryActions = this.controller.registry.get_all_actions();
3638
+ const modelForStep = doneOnly
3639
+ ? this.DoneActionModel
3640
+ : this.ActionModel;
3641
+ const modelAvailableNames = modelForStep?.available_actions;
3642
+ if (Array.isArray(modelAvailableNames) && modelAvailableNames.length > 0) {
3643
+ const deduped = Array.from(new Set(modelAvailableNames.filter((name) => typeof name === 'string' &&
3644
+ name.trim().length > 0 &&
3645
+ registryActions.has(name))));
3646
+ if (deduped.length > 0) {
3647
+ return deduped;
3648
+ }
3649
+ }
3650
+ if (doneOnly && registryActions.has('done')) {
3651
+ return ['done'];
3652
+ }
3653
+ return Array.from(registryActions.keys());
3654
+ }
3655
+ _toStrictActionParamSchema(schema) {
3656
+ if (schema instanceof z.ZodObject) {
3657
+ return schema.strict();
3658
+ }
3659
+ return schema;
3660
+ }
3661
+ _buildActionOutputSchema(doneOnly) {
3662
+ const registryActions = this.controller.registry.get_all_actions();
3663
+ const actionSchemas = this._getOutputActionNames(doneOnly)
3664
+ .map((actionName) => {
3665
+ const actionInfo = registryActions.get(actionName);
3666
+ if (!actionInfo) {
3667
+ return null;
3668
+ }
3669
+ const paramSchema = this._toStrictActionParamSchema(actionInfo.paramSchema);
3670
+ return z.object({ [actionName]: paramSchema }).strict();
3671
+ })
3672
+ .filter((schema) => schema != null);
3673
+ if (actionSchemas.length === 0) {
3674
+ const doneAction = registryActions.get('done');
3675
+ if (doneAction) {
3676
+ const doneParams = this._toStrictActionParamSchema(doneAction.paramSchema);
3677
+ return z.object({ done: doneParams }).strict();
3678
+ }
3679
+ return z.object({ done: z.object({}).strict() }).strict();
3680
+ }
3681
+ if (actionSchemas.length === 1) {
3682
+ return actionSchemas[0];
3683
+ }
3684
+ const [firstActionSchema, secondActionSchema, ...remainingActionSchemas] = actionSchemas;
3685
+ return z.union([
3686
+ firstActionSchema,
3687
+ secondActionSchema,
3688
+ ...remainingActionSchemas,
3689
+ ]);
3690
+ }
3691
+ _buildLlmOutputFormat(doneOnly) {
3692
+ const schema = z.object({
3693
+ thinking: z.string().optional().nullable(),
3694
+ evaluation_previous_goal: z.string().optional().nullable(),
3695
+ memory: z.string().optional().nullable(),
3696
+ next_goal: z.string().optional().nullable(),
3697
+ current_plan_item: z.number().int().optional().nullable(),
3698
+ plan_update: z.array(z.string()).optional().nullable(),
3699
+ action: z
3700
+ .array(this._buildActionOutputSchema(doneOnly))
3701
+ .optional()
3702
+ .nullable(),
3703
+ });
3704
+ const outputFormat = schema;
3705
+ outputFormat.schema = schema;
3706
+ return outputFormat;
3707
+ }
3636
3708
  async _get_model_output_with_retry(messages, signal = null) {
3637
3709
  const urlReplacements = this._process_messages_and_replace_long_urls_shorter_ones(messages);
3638
3710
  const invokeAndParse = async (inputMessages) => {
3639
3711
  this._throwIfAborted(signal);
3640
- const outputFormat = this._enforceDoneOnlyForCurrentStep
3641
- ? DoneOnlyLLMOutputFormat
3642
- : AgentLLMOutputFormat;
3712
+ const outputFormat = this._buildLlmOutputFormat(this._enforceDoneOnlyForCurrentStep);
3643
3713
  const completion = await this.llm.ainvoke(inputMessages, outputFormat, {
3644
3714
  signal: signal ?? undefined,
3645
3715
  session_id: this.session_id,
@@ -173,9 +173,9 @@ export declare class BrowserProfile {
173
173
  get traces_dir(): Nullable<string>;
174
174
  get user_data_dir(): Nullable<string>;
175
175
  get viewport_expansion(): number;
176
- get viewport(): Nullable<import("playwright-core").ViewportSize>;
176
+ get viewport(): Nullable<import("playwright").ViewportSize>;
177
177
  get wait_for_network_idle_page_load_time(): number;
178
- get window_size(): Nullable<import("playwright-core").ViewportSize>;
178
+ get window_size(): Nullable<import("playwright").ViewportSize>;
179
179
  private applyLegacyWindowSize;
180
180
  private warnStorageStateUserDataDirConflict;
181
181
  private warnUserDataDirNonDefault;
@@ -145,13 +145,13 @@ export declare class BrowserSession {
145
145
  private _shutdown_browser_session;
146
146
  close(): Promise<void>;
147
147
  get_browser_state_with_recovery(options?: BrowserStateOptions): Promise<BrowserStateSummary>;
148
- get_current_page(): Promise<import("playwright-core").Page | null>;
148
+ get_current_page(): Promise<import("playwright").Page | null>;
149
149
  update_current_page(page: Page | null, title?: string | null, url?: string | null): void;
150
150
  private _buildTabs;
151
- navigate_to(url: string, options?: BrowserNavigationOptions): Promise<import("playwright-core").Page | null>;
152
- create_new_tab(url: string, options?: BrowserNavigationOptions): Promise<import("playwright-core").Page | null>;
151
+ navigate_to(url: string, options?: BrowserNavigationOptions): Promise<import("playwright").Page | null>;
152
+ create_new_tab(url: string, options?: BrowserNavigationOptions): Promise<import("playwright").Page | null>;
153
153
  private _resolveTabIndex;
154
- switch_to_tab(identifier: number | string, options?: BrowserActionOptions): Promise<import("playwright-core").Page | null>;
154
+ switch_to_tab(identifier: number | string, options?: BrowserActionOptions): Promise<import("playwright").Page | null>;
155
155
  close_tab(identifier: number | string): Promise<void>;
156
156
  wait(seconds: number, options?: BrowserActionOptions): Promise<void>;
157
157
  send_keys(keys: string, options?: BrowserActionOptions): Promise<void>;
@@ -530,6 +530,10 @@ export declare class BrowserSession {
530
530
  * Updates human_current_page to reflect which tab the user is viewing
531
531
  */
532
532
  private _onTabVisibilityChange;
533
+ /**
534
+ * Normalize pid values before issuing process operations.
535
+ */
536
+ private _normalizePid;
533
537
  /**
534
538
  * Kill all child processes spawned by this browser session
535
539
  */
@@ -2,7 +2,7 @@ import fs from 'node:fs';
2
2
  import os from 'node:os';
3
3
  import path from 'node:path';
4
4
  import { isIP } from 'node:net';
5
- import { exec } from 'node:child_process';
5
+ import { execFile } from 'node:child_process';
6
6
  import { promisify } from 'node:util';
7
7
  import { createLogger } from '../logging-config.js';
8
8
  import { match_url_with_domain_pattern, uuid7str } from '../utils.js';
@@ -30,7 +30,7 @@ import { RecordingWatchdog } from './watchdogs/recording-watchdog.js';
30
30
  import { ScreenshotWatchdog } from './watchdogs/screenshot-watchdog.js';
31
31
  import { SecurityWatchdog } from './watchdogs/security-watchdog.js';
32
32
  import { StorageStateWatchdog } from './watchdogs/storage-state-watchdog.js';
33
- const execAsync = promisify(exec);
33
+ const execFileAsync = promisify(execFile);
34
34
  const createEmptyDomState = () => {
35
35
  const root = new DOMElementNode(true, null, 'html', '/html[1]', {}, []);
36
36
  return new DOMState(root, {});
@@ -1760,13 +1760,12 @@ export class BrowserSession {
1760
1760
  text: opt.textContent?.trim() ?? '',
1761
1761
  value: (opt.value ?? '').trim(),
1762
1762
  }));
1763
- const normalize = (value) => value.trim().toLowerCase();
1764
1763
  const targetRaw = optionText.trim();
1765
- const targetLower = normalize(optionText);
1764
+ const targetLower = optionText.trim().toLowerCase();
1766
1765
  let matchedIndex = options.findIndex((opt) => opt.text === targetRaw || opt.value === targetRaw);
1767
1766
  if (matchedIndex < 0) {
1768
- matchedIndex = options.findIndex((opt) => normalize(opt.text) === targetLower ||
1769
- normalize(opt.value) === targetLower);
1767
+ matchedIndex = options.findIndex((opt) => opt.text.trim().toLowerCase() === targetLower ||
1768
+ opt.value.trim().toLowerCase() === targetLower);
1770
1769
  }
1771
1770
  if (matchedIndex < 0) {
1772
1771
  return { found: true, success: false, options };
@@ -1780,8 +1779,16 @@ export class BrowserSession {
1780
1779
  : null;
1781
1780
  const selectedText = selectedOption?.textContent?.trim() ?? '';
1782
1781
  const selectedValue = (root.value ?? '').trim();
1783
- const verified = normalize(selectedValue) === normalize(matched.value) ||
1784
- normalize(selectedText) === normalize(matched.text);
1782
+ const selectedValueLower = selectedValue.trim().toLowerCase();
1783
+ const selectedTextLower = selectedText.trim().toLowerCase();
1784
+ const matchedValueLower = String(matched.value ?? '')
1785
+ .trim()
1786
+ .toLowerCase();
1787
+ const matchedTextLower = String(matched.text ?? '')
1788
+ .trim()
1789
+ .toLowerCase();
1790
+ const verified = selectedValueLower === matchedValueLower ||
1791
+ selectedTextLower === matchedTextLower;
1785
1792
  return {
1786
1793
  found: true,
1787
1794
  success: verified,
@@ -1819,13 +1826,12 @@ export class BrowserSession {
1819
1826
  text: node.textContent?.trim() ?? '',
1820
1827
  value: node.textContent?.trim() ?? '',
1821
1828
  }));
1822
- const normalize = (value) => value.trim().toLowerCase();
1823
1829
  const targetRaw = optionText.trim();
1824
- const targetLower = normalize(optionText);
1830
+ const targetLower = optionText.trim().toLowerCase();
1825
1831
  let matchedIndex = options.findIndex((opt) => opt.text === targetRaw || opt.value === targetRaw);
1826
1832
  if (matchedIndex < 0) {
1827
- matchedIndex = options.findIndex((opt) => normalize(opt.text) === targetLower ||
1828
- normalize(opt.value) === targetLower);
1833
+ matchedIndex = options.findIndex((opt) => opt.text.trim().toLowerCase() === targetLower ||
1834
+ opt.value.trim().toLowerCase() === targetLower);
1829
1835
  }
1830
1836
  if (matchedIndex < 0) {
1831
1837
  return { found: true, success: false, options };
@@ -4272,6 +4278,16 @@ export class BrowserSession {
4272
4278
  }
4273
4279
  // endregion
4274
4280
  // region - Process Management
4281
+ /**
4282
+ * Normalize pid values before issuing process operations.
4283
+ */
4284
+ _normalizePid(pid) {
4285
+ if (!Number.isSafeInteger(pid) || pid <= 0) {
4286
+ this.logger.debug(`Skipping process operation for invalid pid: ${String(pid)}`);
4287
+ return null;
4288
+ }
4289
+ return pid;
4290
+ }
4275
4291
  /**
4276
4292
  * Kill all child processes spawned by this browser session
4277
4293
  */
@@ -4280,17 +4296,17 @@ export class BrowserSession {
4280
4296
  return;
4281
4297
  }
4282
4298
  this.logger.debug(`Killing ${this._childProcesses.size} child processes`);
4283
- for (const pid of this._childProcesses) {
4299
+ for (const trackedPid of this._childProcesses) {
4300
+ const pid = this._normalizePid(trackedPid);
4301
+ if (!pid) {
4302
+ continue;
4303
+ }
4284
4304
  try {
4285
- // Try to kill the process
4286
4305
  process.kill(pid, 'SIGTERM');
4287
4306
  this.logger.debug(`Sent SIGTERM to process ${pid}`);
4288
- // Wait briefly and check if still alive
4289
4307
  await new Promise((resolve) => setTimeout(resolve, 500));
4290
4308
  try {
4291
- // Check if process still exists
4292
4309
  process.kill(pid, 0);
4293
- // If we get here, process is still alive, force kill
4294
4310
  process.kill(pid, 'SIGKILL');
4295
4311
  this.logger.debug(`Sent SIGKILL to process ${pid}`);
4296
4312
  }
@@ -4299,7 +4315,6 @@ export class BrowserSession {
4299
4315
  }
4300
4316
  }
4301
4317
  catch (error) {
4302
- // Process doesn't exist or we don't have permission
4303
4318
  this.logger.debug(`Could not kill process ${pid}: ${error.message}`);
4304
4319
  }
4305
4320
  }
@@ -4309,39 +4324,39 @@ export class BrowserSession {
4309
4324
  * Terminate the browser process and all its children
4310
4325
  */
4311
4326
  async _terminateBrowserProcess() {
4312
- if (!this.browser_pid) {
4327
+ const browserPid = this._normalizePid(this.browser_pid);
4328
+ if (!browserPid) {
4313
4329
  return;
4314
4330
  }
4315
4331
  try {
4316
- this.logger.debug(`Terminating browser process ${this.browser_pid}`);
4317
- // Platform-specific process tree termination
4332
+ this.logger.debug(`Terminating browser process ${browserPid}`);
4318
4333
  if (process.platform === 'win32') {
4319
- // Windows: use taskkill to kill process tree
4320
- await execAsync(`taskkill /PID ${this.browser_pid} /T /F`).catch(() => {
4334
+ await execFileAsync('taskkill', [
4335
+ '/PID',
4336
+ String(browserPid),
4337
+ '/T',
4338
+ '/F',
4339
+ ]).catch(() => {
4321
4340
  // Ignore errors if process already dead
4322
4341
  });
4323
4342
  }
4324
4343
  else {
4325
- // Unix-like: kill process group
4326
4344
  try {
4327
- // Try to kill the process group
4328
- process.kill(-this.browser_pid, 'SIGTERM');
4345
+ process.kill(-browserPid, 'SIGTERM');
4329
4346
  await new Promise((resolve) => setTimeout(resolve, 1000));
4330
- // Check if still alive and force kill if needed
4331
4347
  try {
4332
- process.kill(-this.browser_pid, 0);
4333
- process.kill(-this.browser_pid, 'SIGKILL');
4348
+ process.kill(-browserPid, 0);
4349
+ process.kill(-browserPid, 'SIGKILL');
4334
4350
  }
4335
4351
  catch {
4336
4352
  // Process is dead
4337
4353
  }
4338
4354
  }
4339
4355
  catch {
4340
- // Fallback to killing just the process
4341
4356
  try {
4342
- process.kill(this.browser_pid, 'SIGTERM');
4357
+ process.kill(browserPid, 'SIGTERM');
4343
4358
  await new Promise((resolve) => setTimeout(resolve, 1000));
4344
- process.kill(this.browser_pid, 'SIGKILL');
4359
+ process.kill(browserPid, 'SIGKILL');
4345
4360
  }
4346
4361
  catch {
4347
4362
  // Process doesn't exist
@@ -4358,26 +4373,37 @@ export class BrowserSession {
4358
4373
  * Cross-platform implementation using ps on Unix-like systems and WMIC on Windows
4359
4374
  */
4360
4375
  async _getChildProcesses(pid) {
4376
+ const normalizedPid = this._normalizePid(pid);
4377
+ if (!normalizedPid) {
4378
+ return [];
4379
+ }
4361
4380
  try {
4362
4381
  if (process.platform === 'win32') {
4363
- // Windows: use WMIC
4364
- const { stdout } = await execAsync(`wmic process where (ParentProcessId=${pid}) get ProcessId`);
4365
- const pids = stdout
4366
- .split('\n')
4367
- .slice(1) // Skip header
4368
- .map((line) => parseInt(line.trim(), 10))
4369
- .filter((p) => !isNaN(p));
4370
- return pids;
4371
- }
4372
- else {
4373
- // Unix-like: use ps
4374
- const { stdout } = await execAsync(`ps -o pid= --ppid ${pid}`);
4382
+ const { stdout } = await execFileAsync('wmic', [
4383
+ 'process',
4384
+ 'where',
4385
+ `ParentProcessId=${normalizedPid}`,
4386
+ 'get',
4387
+ 'ProcessId',
4388
+ ]);
4375
4389
  const pids = stdout
4376
4390
  .split('\n')
4391
+ .slice(1)
4377
4392
  .map((line) => parseInt(line.trim(), 10))
4378
- .filter((p) => !isNaN(p));
4393
+ .filter((p) => Number.isFinite(p));
4379
4394
  return pids;
4380
4395
  }
4396
+ const { stdout } = await execFileAsync('ps', [
4397
+ '-o',
4398
+ 'pid=',
4399
+ '--ppid',
4400
+ String(normalizedPid),
4401
+ ]);
4402
+ const pids = stdout
4403
+ .split('\n')
4404
+ .map((line) => parseInt(line.trim(), 10))
4405
+ .filter((p) => Number.isFinite(p));
4406
+ return pids;
4381
4407
  }
4382
4408
  catch {
4383
4409
  return [];
@@ -4387,13 +4413,19 @@ export class BrowserSession {
4387
4413
  * Track a child process
4388
4414
  */
4389
4415
  _trackChildProcess(pid) {
4390
- this._childProcesses.add(pid);
4416
+ const normalizedPid = this._normalizePid(pid);
4417
+ if (normalizedPid) {
4418
+ this._childProcesses.add(normalizedPid);
4419
+ }
4391
4420
  }
4392
4421
  /**
4393
4422
  * Untrack a child process
4394
4423
  */
4395
4424
  _untrackChildProcess(pid) {
4396
- this._childProcesses.delete(pid);
4425
+ const normalizedPid = this._normalizePid(pid);
4426
+ if (normalizedPid) {
4427
+ this._childProcesses.delete(normalizedPid);
4428
+ }
4397
4429
  }
4398
4430
  // region: Loading Animations
4399
4431
  /**