browser-use 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -23,7 +23,7 @@
23
23
  ## ✨ Features
24
24
 
25
25
  - 🤖 **Autonomous Browser Control** — AI-driven navigation, clicking, typing, form filling, scrolling, and tab management
26
- - 🧠 **10+ LLM Providers** — OpenAI, Anthropic, Google Gemini, Azure, AWS Bedrock, Groq, Ollama, DeepSeek, OpenRouter, Mistral, Cerebras, and custom providers
26
+ - 🧠 **15+ LLM Providers & Adapters** — OpenAI, Anthropic, Google Gemini, Azure, AWS Bedrock, Groq, Ollama, DeepSeek, OpenRouter, Mistral, Cerebras, Browser Use, LiteLLM, OCI Raw, Vercel, and custom providers
27
27
  - 👁️ **Vision Support** — Screenshot-based understanding for visual web interactions
28
28
  - 🔧 **45+ Built-in Actions** — Navigation, element interaction, scrolling, forms, tabs, content extraction, file I/O, and more
29
29
  - 🧩 **Custom Actions** — Extensible registry with Zod schema validation, domain restrictions, and page filters
@@ -112,7 +112,7 @@ npx browser-use --mcp
112
112
  | **BrowserSession** | Playwright wrapper — browser lifecycle, tab management, screenshots |
113
113
  | **DomService** | Extracts interactive elements with indexed mapping for LLM consumption |
114
114
  | **MessageManager** | Manages LLM conversation history with token optimization |
115
- | **LLM Providers** | Unified `BaseChatModel` interface across 10+ providers |
115
+ | **LLM Providers** | Unified `BaseChatModel` interface across 15+ providers and adapters |
116
116
 
117
117
  ### How It Works
118
118
 
@@ -125,19 +125,23 @@ npx browser-use --mcp
125
125
 
126
126
  ## 🔌 LLM Providers
127
127
 
128
- | Provider | Import | Vision | Notes |
129
- | ----------------- | ---------------------------- | ------ | --------------------------------------------- |
130
- | **OpenAI** | `browser-use/llm/openai` | ✅ | Default provider, reasoning models (o1/o3/o4) |
131
- | **Anthropic** | `browser-use/llm/anthropic` | ✅ | Prompt caching support |
132
- | **Google Gemini** | `browser-use/llm/google` | ✅ | Extended thinking support |
133
- | **Azure OpenAI** | `browser-use/llm/azure` | ✅ | Enterprise deployment |
134
- | **AWS Bedrock** | `browser-use/llm/aws` | ✅ | Claude via AWS |
135
- | **Groq** | `browser-use/llm/groq` | ❌ | Fastest inference |
136
- | **Ollama** | `browser-use/llm/ollama` | ❌ | Local/self-hosted models |
137
- | **DeepSeek** | `browser-use/llm/deepseek` | ❌ | Cost-effective |
138
- | **OpenRouter** | `browser-use/llm/openrouter` | Varies | Multi-model routing |
139
- | **Mistral** | `browser-use/llm/mistral` | Varies | Mistral models |
140
- | **Cerebras** | `browser-use/llm/cerebras` | ❌ | Fast inference |
128
+ | Provider | Import | Vision | Notes |
129
+ | ----------------- | ----------------------------- | ------ | --------------------------------------------- |
130
+ | **OpenAI** | `browser-use/llm/openai` | ✅ | Default provider, reasoning models (o1/o3/o4) |
131
+ | **Anthropic** | `browser-use/llm/anthropic` | ✅ | Prompt caching support |
132
+ | **Google Gemini** | `browser-use/llm/google` | ✅ | Extended thinking support |
133
+ | **Azure OpenAI** | `browser-use/llm/azure` | ✅ | Enterprise deployment |
134
+ | **AWS Bedrock** | `browser-use/llm/aws` | ✅ | Claude via AWS |
135
+ | **Groq** | `browser-use/llm/groq` | ❌ | Fastest inference |
136
+ | **Ollama** | `browser-use/llm/ollama` | ❌ | Local/self-hosted models |
137
+ | **DeepSeek** | `browser-use/llm/deepseek` | ❌ | Cost-effective |
138
+ | **OpenRouter** | `browser-use/llm/openrouter` | Varies | Multi-model routing |
139
+ | **Mistral** | `browser-use/llm/mistral` | Varies | Mistral models |
140
+ | **Cerebras** | `browser-use/llm/cerebras` | ❌ | Fast inference |
141
+ | **Browser Use** | `browser-use/llm/browser-use` | Varies | Hosted Browser Use LLM |
142
+ | **LiteLLM** | `browser-use/llm/litellm` | Varies | OpenAI-compatible LiteLLM gateway |
143
+ | **OCI Raw** | `browser-use/llm/oci-raw` | Varies | Oracle Cloud Generative AI |
144
+ | **Vercel** | `browser-use/llm/vercel` | Varies | Vercel AI Gateway / routed models |
141
145
 
142
146
  <details>
143
147
  <summary>Provider examples</summary>
@@ -210,6 +214,7 @@ const agent = new Agent({
210
214
  ### Custom Actions
211
215
 
212
216
  ```typescript
217
+ import fs from 'node:fs';
213
218
  import { Controller, ActionResult } from 'browser-use';
214
219
  import { z } from 'zod';
215
220
 
@@ -345,7 +350,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
345
350
  }
346
351
  ```
347
352
 
348
- Available MCP tools: `browser_run_task`, `browser_navigate`, `browser_click`, `browser_type`, `browser_scroll`, `browser_get_state`, `browser_extract`, `browser_screenshot`, `browser_close`.
353
+ Core MCP tools include `retry_with_browser_use_agent`, `browser_navigate`, `browser_click`, `browser_type`, `browser_get_state`, `browser_extract_content`, `browser_scroll`, `browser_go_back`, `browser_list_tabs`, `browser_switch_tab`, `browser_close_tab`, `browser_list_sessions`, `browser_close_session`, and `browser_close_all`. The server also exposes registered controller actions as additional MCP tools.
349
354
 
350
355
  > See [MCP Server Guide](./docs/MCP_SERVER.md) for more details.
351
356
 
@@ -423,4 +428,4 @@ pnpm exec tsx examples/simple-search.ts
423
428
 
424
429
  ## 📄 License
425
430
 
426
- [MIT](./LICENSE) © Web LLM
431
+ [MIT](./LICENSE)
@@ -34,7 +34,7 @@ import { AgentTelemetryEvent } from '../telemetry/views.js';
34
34
  import { TokenCost } from '../tokens/service.js';
35
35
  import { construct_judge_messages, construct_simple_judge_messages, } from './judge.js';
36
36
  import { CloudSkillService, MissingCookieException, build_skill_parameters_schema, get_skill_slug, } from '../skills/index.js';
37
- loadEnv();
37
+ loadEnv({ quiet: true });
38
38
  const logger = createLogger('browser_use.agent');
39
39
  const URL_PATTERN = /https?:\/\/[^\s<>"']+|www\.[^\s<>"']+|[^\s<>"']+\.[a-z]{2,}(?:\/[^\s<>"']*)?/gi;
40
40
  export const log_response = (response, registry, logInstance = logger) => {
@@ -3886,7 +3886,16 @@ export class Agent {
3886
3886
  {});
3887
3887
  const paramsResult = actionInfo.paramSchema.safeParse(rawParams);
3888
3888
  if (!paramsResult.success) {
3889
- throw new Error(`Invalid parameters for action '${requestedActionName}': ${paramsResult.error.message}`);
3889
+ // Surface a human-readable issue list (zod v4 `prettifyError`) plus
3890
+ // a corrective hint, rather than the default JSON dump of `.issues`.
3891
+ // This Error propagates → `_handle_step_error` writes it into
3892
+ // `state.last_result` → `create_state_messages` injects it into the
3893
+ // next LLM turn, so the model knows exactly what shape it got wrong.
3894
+ const pretty = z.prettifyError(paramsResult.error);
3895
+ const sentParams = JSON.stringify(rawParams);
3896
+ throw new Error(`Schema validation failed for action '${requestedActionName}'. ` +
3897
+ `You sent: ${sentParams}. Issues:\n${pretty}\n` +
3898
+ `Please retry with parameters matching the action's schema exactly.`);
3890
3899
  }
3891
3900
  normalizedActions.push(new modelForStep({
3892
3901
  [actionName]: paramsResult.data,
package/dist/cli.js CHANGED
@@ -33,7 +33,7 @@ import { setupLogging } from './logging-config.js';
33
33
  import { get_tunnel_manager } from './skill-cli/tunnel.js';
34
34
  import { DeviceAuthClient, save_cloud_api_token } from './sync/auth.js';
35
35
  import dotenv from 'dotenv';
36
- dotenv.config();
36
+ dotenv.config({ quiet: true });
37
37
  const require = createRequire(import.meta.url);
38
38
  const CLI_PROVIDER_ALIASES = {
39
39
  openai: 'openai',
package/dist/config.js CHANGED
@@ -4,7 +4,7 @@ import path from 'node:path';
4
4
  import { randomUUID } from 'node:crypto';
5
5
  import { config as loadEnv } from 'dotenv';
6
6
  import { createLogger } from './logging-config.js';
7
- loadEnv();
7
+ loadEnv({ quiet: true });
8
8
  const logger = createLogger('browser_use.config');
9
9
  const expand_user = (value) => value.replace(/^~(?=$|\/|\\)/, os.homedir());
10
10
  const resolve_path = (value) => path.resolve(expand_user(value));
@@ -4,6 +4,7 @@ export type ActionHandler = (...args: any[]) => Promise<unknown> | unknown;
4
4
  type BrowserSession = unknown;
5
5
  type BaseChatModel = unknown;
6
6
  type FileSystem = unknown;
7
+ export declare function renderParamsJsonSchema(schema: ZodTypeAny, skipKeys: Set<string>): Record<string, unknown>;
7
8
  export declare class RegisteredAction {
8
9
  readonly name: string;
9
10
  readonly description: string;
@@ -13,6 +14,7 @@ export declare class RegisteredAction {
13
14
  readonly pageFilter: ((page: Page) => boolean) | null;
14
15
  readonly terminates_sequence: boolean;
15
16
  constructor(name: string, description: string, handler: ActionHandler, paramSchema: ZodTypeAny, domains?: string[] | null, pageFilter?: ((page: Page) => boolean) | null, terminates_sequence?: boolean);
17
+ getPromptJsonSchema(): Record<string, unknown>;
16
18
  promptDescription(): string;
17
19
  }
18
20
  export declare class ActionModel {
@@ -15,6 +15,38 @@ const getPageUrl = (page) => {
15
15
  }
16
16
  return candidate ?? '';
17
17
  };
18
+ // Render an action's param schema as compact JSON Schema for the LLM prompt.
19
+ // Replaces a prior raw dump of zod's private `_def` AST, which leaked
20
+ // internal keys like `innerType`/`defaultValue` and confused the LLM into
21
+ // copying default booleans into numeric fields (see scroll.num_pages bug).
22
+ export function renderParamsJsonSchema(schema, skipKeys) {
23
+ // `io: 'input'` makes zod render the *input* shape (what the LLM is
24
+ // expected to provide). Without it, fields with `.default(...)` get marked
25
+ // as required in the JSON Schema (because the parsed *output* always has
26
+ // them), which misleads the model — e.g. scroll.num_pages, done.success.
27
+ const raw = z.toJSONSchema(schema, {
28
+ io: 'input',
29
+ unrepresentable: 'any',
30
+ });
31
+ // Strip dialect noise the LLM doesn't need.
32
+ delete raw.$schema;
33
+ const properties = raw.properties ?? {};
34
+ const filteredProps = {};
35
+ for (const [key, value] of Object.entries(properties)) {
36
+ if (skipKeys.has(key)) {
37
+ continue;
38
+ }
39
+ filteredProps[key] = value;
40
+ }
41
+ raw.properties = filteredProps;
42
+ if (Array.isArray(raw.required)) {
43
+ raw.required = raw.required.filter((key) => typeof key === 'string' && !skipKeys.has(key));
44
+ if (raw.required.length === 0) {
45
+ delete raw.required;
46
+ }
47
+ }
48
+ return raw;
49
+ }
18
50
  export class RegisteredAction {
19
51
  name;
20
52
  description;
@@ -32,10 +64,12 @@ export class RegisteredAction {
32
64
  this.pageFilter = pageFilter;
33
65
  this.terminates_sequence = terminates_sequence;
34
66
  }
35
- promptDescription() {
67
+ // Returns the JSON Schema rendered for the LLM prompt, with the same
68
+ // skipKeys logic applied as in `promptDescription`. Exposed so tooling
69
+ // (e.g. scripts/dump-schema.ts) can exercise the exact code path the
70
+ // model sees.
71
+ getPromptJsonSchema() {
36
72
  const skipKeys = new Set(['title']);
37
- let description = `${this.description}: \n`;
38
- description += `{${this.name}: `;
39
73
  const schemaShape = (this.paramSchema instanceof z.ZodObject && this.paramSchema.shape) ||
40
74
  ('shape' in this.paramSchema ? this.paramSchema.shape : null);
41
75
  const hideStructuredDoneSuccess = Boolean(this.name === 'done' &&
@@ -46,26 +80,19 @@ export class RegisteredAction {
46
80
  if (hideStructuredDoneSuccess) {
47
81
  skipKeys.add('success');
48
82
  }
49
- const hideExtractOutputSchema = Boolean(this.name === 'extract_structured_data' &&
83
+ const hideExtractOutputSchema = Boolean((this.name === 'extract_structured_data' || this.name === 'extract') &&
50
84
  schemaShape &&
51
85
  typeof schemaShape === 'object' &&
52
86
  Object.prototype.hasOwnProperty.call(schemaShape, 'output_schema'));
53
87
  if (hideExtractOutputSchema) {
54
88
  skipKeys.add('output_schema');
55
89
  }
56
- if (schemaShape) {
57
- const props = Object.fromEntries(Object.entries(schemaShape)
58
- .filter(([key]) => !skipKeys.has(key))
59
- .map(([key, value]) => {
60
- const entries = value instanceof z.ZodType ? value._def : value;
61
- const cleanEntries = Object.fromEntries(Object.entries(entries).filter(([propKey]) => !skipKeys.has(propKey)));
62
- return [key, cleanEntries];
63
- }));
64
- description += JSON.stringify(props);
65
- }
66
- else {
67
- description += '{}';
68
- }
90
+ return renderParamsJsonSchema(this.paramSchema, skipKeys);
91
+ }
92
+ promptDescription() {
93
+ let description = `${this.description}: \n`;
94
+ description += `{${this.name}: `;
95
+ description += JSON.stringify(this.getPromptJsonSchema());
69
96
  description += '}';
70
97
  return description;
71
98
  }
@@ -1,7 +1,7 @@
1
1
  import { createRequire } from 'node:module';
2
2
  import { config as loadEnv } from 'dotenv';
3
3
  import { createLogger } from './logging-config.js';
4
- loadEnv();
4
+ loadEnv({ quiet: true });
5
5
  const require = createRequire(import.meta.url);
6
6
  const logger = createLogger('browser_use.observability');
7
7
  let lmnrObserve = null;
package/dist/utils.js CHANGED
@@ -10,7 +10,7 @@ import { fileURLToPath } from 'node:url';
10
10
  import { config as loadEnv } from 'dotenv';
11
11
  import * as minimatchModule from 'minimatch';
12
12
  import { createLogger } from './logging-config.js';
13
- loadEnv();
13
+ loadEnv({ quiet: true });
14
14
  const logger = createLogger('browser_use.utils');
15
15
  let _exiting = false;
16
16
  const minimatch = (minimatchModule.minimatch ??
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "browser-use",
3
- "version": "0.6.0",
3
+ "version": "0.6.1",
4
4
  "description": "A TypeScript-first library for programmatic browser control, designed for building AI-powered web agents.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -264,6 +264,11 @@
264
264
  "engines": {
265
265
  "node": ">=18.0.0"
266
266
  },
267
+ "config": {
268
+ "commitizen": {
269
+ "path": "cz-conventional-changelog"
270
+ }
271
+ },
267
272
  "keywords": [
268
273
  "browser",
269
274
  "use",
@@ -285,7 +290,7 @@
285
290
  "@google/genai": "^1.40.0",
286
291
  "@modelcontextprotocol/sdk": "^1.27.1",
287
292
  "adm-zip": "^0.5.16",
288
- "axios": "^1.13.4",
293
+ "axios": "^1.16.0",
289
294
  "canvas": "^3.2.1",
290
295
  "dotenv": "^17.2.4",
291
296
  "eventemitter3": "^5.0.4",
@@ -318,6 +323,7 @@
318
323
  "@typescript-eslint/parser": "^8.54.0",
319
324
  "@vitest/coverage-v8": "^4.0.18",
320
325
  "commitizen": "^4.3.1",
326
+ "cz-conventional-changelog": "^3.3.0",
321
327
  "eslint": "^9.39.2",
322
328
  "eslint-config-prettier": "^10.1.8",
323
329
  "eslint-plugin-import": "^2.32.0",
@@ -326,14 +332,9 @@
326
332
  "prettier": "^3.8.1",
327
333
  "tsx": "^4.21.0",
328
334
  "typescript": "^5.9.3",
329
- "vite": "^7.3.1",
335
+ "vite": "^7.3.2",
330
336
  "vitest": "^4.0.18"
331
337
  },
332
- "config": {
333
- "commitizen": {
334
- "path": "cz-conventional-changelog"
335
- }
336
- },
337
338
  "scripts": {
338
339
  "build": "node scripts/clean-dist.mjs && tsc && node scripts/copy-dom-tree.mjs",
339
340
  "build:watch": "tsc --watch --preserveWatchOutput",
@@ -349,12 +350,12 @@
349
350
  "test:watch": "vitest --watch",
350
351
  "test:pack": "node scripts/smoke-pack.mjs",
351
352
  "check": "pnpm lint && pnpm typecheck && pnpm typecheck:test && pnpm test:unit && pnpm test:integration && pnpm test:e2e && pnpm test:pack",
353
+ "commit": "cz",
352
354
  "typecheck": "tsc --noEmit",
353
355
  "typecheck:test": "tsc -p tsconfig.test.json --noEmit",
354
356
  "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
355
357
  "format:check": "prettier --check \"src/**/*.ts\" \"test/**/*.ts\"",
356
358
  "prettier": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
357
- "commit": "pnpm exec git-cz",
358
359
  "postinstall": "playwright install chromium",
359
360
  "postinstall:ci": "playwright install --with-deps chromium"
360
361
  }