browser-use 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -17
- package/dist/agent/service.js +11 -2
- package/dist/cli.js +1 -1
- package/dist/config.js +1 -1
- package/dist/controller/registry/views.d.ts +2 -0
- package/dist/controller/registry/views.js +44 -17
- package/dist/observability.js +1 -1
- package/dist/utils.js +1 -1
- package/package.json +10 -9
package/README.md
CHANGED
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
## ✨ Features
|
|
24
24
|
|
|
25
25
|
- 🤖 **Autonomous Browser Control** — AI-driven navigation, clicking, typing, form filling, scrolling, and tab management
|
|
26
|
-
- 🧠 **
|
|
26
|
+
- 🧠 **15+ LLM Providers & Adapters** — OpenAI, Anthropic, Google Gemini, Azure, AWS Bedrock, Groq, Ollama, DeepSeek, OpenRouter, Mistral, Cerebras, Browser Use, LiteLLM, OCI Raw, Vercel, and custom providers
|
|
27
27
|
- 👁️ **Vision Support** — Screenshot-based understanding for visual web interactions
|
|
28
28
|
- 🔧 **45+ Built-in Actions** — Navigation, element interaction, scrolling, forms, tabs, content extraction, file I/O, and more
|
|
29
29
|
- 🧩 **Custom Actions** — Extensible registry with Zod schema validation, domain restrictions, and page filters
|
|
@@ -112,7 +112,7 @@ npx browser-use --mcp
|
|
|
112
112
|
| **BrowserSession** | Playwright wrapper — browser lifecycle, tab management, screenshots |
|
|
113
113
|
| **DomService** | Extracts interactive elements with indexed mapping for LLM consumption |
|
|
114
114
|
| **MessageManager** | Manages LLM conversation history with token optimization |
|
|
115
|
-
| **LLM Providers** | Unified `BaseChatModel` interface across
|
|
115
|
+
| **LLM Providers** | Unified `BaseChatModel` interface across 15+ providers and adapters |
|
|
116
116
|
|
|
117
117
|
### How It Works
|
|
118
118
|
|
|
@@ -125,19 +125,23 @@ npx browser-use --mcp
|
|
|
125
125
|
|
|
126
126
|
## 🔌 LLM Providers
|
|
127
127
|
|
|
128
|
-
| Provider | Import
|
|
129
|
-
| ----------------- |
|
|
130
|
-
| **OpenAI** | `browser-use/llm/openai`
|
|
131
|
-
| **Anthropic** | `browser-use/llm/anthropic`
|
|
132
|
-
| **Google Gemini** | `browser-use/llm/google`
|
|
133
|
-
| **Azure OpenAI** | `browser-use/llm/azure`
|
|
134
|
-
| **AWS Bedrock** | `browser-use/llm/aws`
|
|
135
|
-
| **Groq** | `browser-use/llm/groq`
|
|
136
|
-
| **Ollama** | `browser-use/llm/ollama`
|
|
137
|
-
| **DeepSeek** | `browser-use/llm/deepseek`
|
|
138
|
-
| **OpenRouter** | `browser-use/llm/openrouter`
|
|
139
|
-
| **Mistral** | `browser-use/llm/mistral`
|
|
140
|
-
| **Cerebras** | `browser-use/llm/cerebras`
|
|
128
|
+
| Provider | Import | Vision | Notes |
|
|
129
|
+
| ----------------- | ----------------------------- | ------ | --------------------------------------------- |
|
|
130
|
+
| **OpenAI** | `browser-use/llm/openai` | ✅ | Default provider, reasoning models (o1/o3/o4) |
|
|
131
|
+
| **Anthropic** | `browser-use/llm/anthropic` | ✅ | Prompt caching support |
|
|
132
|
+
| **Google Gemini** | `browser-use/llm/google` | ✅ | Extended thinking support |
|
|
133
|
+
| **Azure OpenAI** | `browser-use/llm/azure` | ✅ | Enterprise deployment |
|
|
134
|
+
| **AWS Bedrock** | `browser-use/llm/aws` | ✅ | Claude via AWS |
|
|
135
|
+
| **Groq** | `browser-use/llm/groq` | ❌ | Fastest inference |
|
|
136
|
+
| **Ollama** | `browser-use/llm/ollama` | ❌ | Local/self-hosted models |
|
|
137
|
+
| **DeepSeek** | `browser-use/llm/deepseek` | ❌ | Cost-effective |
|
|
138
|
+
| **OpenRouter** | `browser-use/llm/openrouter` | Varies | Multi-model routing |
|
|
139
|
+
| **Mistral** | `browser-use/llm/mistral` | Varies | Mistral models |
|
|
140
|
+
| **Cerebras** | `browser-use/llm/cerebras` | ❌ | Fast inference |
|
|
141
|
+
| **Browser Use** | `browser-use/llm/browser-use` | Varies | Hosted Browser Use LLM |
|
|
142
|
+
| **LiteLLM** | `browser-use/llm/litellm` | Varies | OpenAI-compatible LiteLLM gateway |
|
|
143
|
+
| **OCI Raw** | `browser-use/llm/oci-raw` | Varies | Oracle Cloud Generative AI |
|
|
144
|
+
| **Vercel** | `browser-use/llm/vercel` | Varies | Vercel AI Gateway / routed models |
|
|
141
145
|
|
|
142
146
|
<details>
|
|
143
147
|
<summary>Provider examples</summary>
|
|
@@ -210,6 +214,7 @@ const agent = new Agent({
|
|
|
210
214
|
### Custom Actions
|
|
211
215
|
|
|
212
216
|
```typescript
|
|
217
|
+
import fs from 'node:fs';
|
|
213
218
|
import { Controller, ActionResult } from 'browser-use';
|
|
214
219
|
import { z } from 'zod';
|
|
215
220
|
|
|
@@ -345,7 +350,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
|
|
|
345
350
|
}
|
|
346
351
|
```
|
|
347
352
|
|
|
348
|
-
|
|
353
|
+
Core MCP tools include `retry_with_browser_use_agent`, `browser_navigate`, `browser_click`, `browser_type`, `browser_get_state`, `browser_extract_content`, `browser_scroll`, `browser_go_back`, `browser_list_tabs`, `browser_switch_tab`, `browser_close_tab`, `browser_list_sessions`, `browser_close_session`, and `browser_close_all`. The server also exposes registered controller actions as additional MCP tools.
|
|
349
354
|
|
|
350
355
|
> See [MCP Server Guide](./docs/MCP_SERVER.md) for more details.
|
|
351
356
|
|
|
@@ -423,4 +428,4 @@ pnpm exec tsx examples/simple-search.ts
|
|
|
423
428
|
|
|
424
429
|
## 📄 License
|
|
425
430
|
|
|
426
|
-
[MIT](./LICENSE)
|
|
431
|
+
[MIT](./LICENSE)
|
package/dist/agent/service.js
CHANGED
|
@@ -34,7 +34,7 @@ import { AgentTelemetryEvent } from '../telemetry/views.js';
|
|
|
34
34
|
import { TokenCost } from '../tokens/service.js';
|
|
35
35
|
import { construct_judge_messages, construct_simple_judge_messages, } from './judge.js';
|
|
36
36
|
import { CloudSkillService, MissingCookieException, build_skill_parameters_schema, get_skill_slug, } from '../skills/index.js';
|
|
37
|
-
loadEnv();
|
|
37
|
+
loadEnv({ quiet: true });
|
|
38
38
|
const logger = createLogger('browser_use.agent');
|
|
39
39
|
const URL_PATTERN = /https?:\/\/[^\s<>"']+|www\.[^\s<>"']+|[^\s<>"']+\.[a-z]{2,}(?:\/[^\s<>"']*)?/gi;
|
|
40
40
|
export const log_response = (response, registry, logInstance = logger) => {
|
|
@@ -3886,7 +3886,16 @@ export class Agent {
|
|
|
3886
3886
|
{});
|
|
3887
3887
|
const paramsResult = actionInfo.paramSchema.safeParse(rawParams);
|
|
3888
3888
|
if (!paramsResult.success) {
|
|
3889
|
-
|
|
3889
|
+
// Surface a human-readable issue list (zod v4 `prettifyError`) plus
|
|
3890
|
+
// a corrective hint, rather than the default JSON dump of `.issues`.
|
|
3891
|
+
// This Error propagates → `_handle_step_error` writes it into
|
|
3892
|
+
// `state.last_result` → `create_state_messages` injects it into the
|
|
3893
|
+
// next LLM turn, so the model knows exactly what shape it got wrong.
|
|
3894
|
+
const pretty = z.prettifyError(paramsResult.error);
|
|
3895
|
+
const sentParams = JSON.stringify(rawParams);
|
|
3896
|
+
throw new Error(`Schema validation failed for action '${requestedActionName}'. ` +
|
|
3897
|
+
`You sent: ${sentParams}. Issues:\n${pretty}\n` +
|
|
3898
|
+
`Please retry with parameters matching the action's schema exactly.`);
|
|
3890
3899
|
}
|
|
3891
3900
|
normalizedActions.push(new modelForStep({
|
|
3892
3901
|
[actionName]: paramsResult.data,
|
package/dist/cli.js
CHANGED
|
@@ -33,7 +33,7 @@ import { setupLogging } from './logging-config.js';
|
|
|
33
33
|
import { get_tunnel_manager } from './skill-cli/tunnel.js';
|
|
34
34
|
import { DeviceAuthClient, save_cloud_api_token } from './sync/auth.js';
|
|
35
35
|
import dotenv from 'dotenv';
|
|
36
|
-
dotenv.config();
|
|
36
|
+
dotenv.config({ quiet: true });
|
|
37
37
|
const require = createRequire(import.meta.url);
|
|
38
38
|
const CLI_PROVIDER_ALIASES = {
|
|
39
39
|
openai: 'openai',
|
package/dist/config.js
CHANGED
|
@@ -4,7 +4,7 @@ import path from 'node:path';
|
|
|
4
4
|
import { randomUUID } from 'node:crypto';
|
|
5
5
|
import { config as loadEnv } from 'dotenv';
|
|
6
6
|
import { createLogger } from './logging-config.js';
|
|
7
|
-
loadEnv();
|
|
7
|
+
loadEnv({ quiet: true });
|
|
8
8
|
const logger = createLogger('browser_use.config');
|
|
9
9
|
const expand_user = (value) => value.replace(/^~(?=$|\/|\\)/, os.homedir());
|
|
10
10
|
const resolve_path = (value) => path.resolve(expand_user(value));
|
|
@@ -4,6 +4,7 @@ export type ActionHandler = (...args: any[]) => Promise<unknown> | unknown;
|
|
|
4
4
|
type BrowserSession = unknown;
|
|
5
5
|
type BaseChatModel = unknown;
|
|
6
6
|
type FileSystem = unknown;
|
|
7
|
+
export declare function renderParamsJsonSchema(schema: ZodTypeAny, skipKeys: Set<string>): Record<string, unknown>;
|
|
7
8
|
export declare class RegisteredAction {
|
|
8
9
|
readonly name: string;
|
|
9
10
|
readonly description: string;
|
|
@@ -13,6 +14,7 @@ export declare class RegisteredAction {
|
|
|
13
14
|
readonly pageFilter: ((page: Page) => boolean) | null;
|
|
14
15
|
readonly terminates_sequence: boolean;
|
|
15
16
|
constructor(name: string, description: string, handler: ActionHandler, paramSchema: ZodTypeAny, domains?: string[] | null, pageFilter?: ((page: Page) => boolean) | null, terminates_sequence?: boolean);
|
|
17
|
+
getPromptJsonSchema(): Record<string, unknown>;
|
|
16
18
|
promptDescription(): string;
|
|
17
19
|
}
|
|
18
20
|
export declare class ActionModel {
|
|
@@ -15,6 +15,38 @@ const getPageUrl = (page) => {
|
|
|
15
15
|
}
|
|
16
16
|
return candidate ?? '';
|
|
17
17
|
};
|
|
18
|
+
// Render an action's param schema as compact JSON Schema for the LLM prompt.
|
|
19
|
+
// Replaces a prior raw dump of zod's private `_def` AST, which leaked
|
|
20
|
+
// internal keys like `innerType`/`defaultValue` and confused the LLM into
|
|
21
|
+
// copying default booleans into numeric fields (see scroll.num_pages bug).
|
|
22
|
+
export function renderParamsJsonSchema(schema, skipKeys) {
|
|
23
|
+
// `io: 'input'` makes zod render the *input* shape (what the LLM is
|
|
24
|
+
// expected to provide). Without it, fields with `.default(...)` get marked
|
|
25
|
+
// as required in the JSON Schema (because the parsed *output* always has
|
|
26
|
+
// them), which misleads the model — e.g. scroll.num_pages, done.success.
|
|
27
|
+
const raw = z.toJSONSchema(schema, {
|
|
28
|
+
io: 'input',
|
|
29
|
+
unrepresentable: 'any',
|
|
30
|
+
});
|
|
31
|
+
// Strip dialect noise the LLM doesn't need.
|
|
32
|
+
delete raw.$schema;
|
|
33
|
+
const properties = raw.properties ?? {};
|
|
34
|
+
const filteredProps = {};
|
|
35
|
+
for (const [key, value] of Object.entries(properties)) {
|
|
36
|
+
if (skipKeys.has(key)) {
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
filteredProps[key] = value;
|
|
40
|
+
}
|
|
41
|
+
raw.properties = filteredProps;
|
|
42
|
+
if (Array.isArray(raw.required)) {
|
|
43
|
+
raw.required = raw.required.filter((key) => typeof key === 'string' && !skipKeys.has(key));
|
|
44
|
+
if (raw.required.length === 0) {
|
|
45
|
+
delete raw.required;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return raw;
|
|
49
|
+
}
|
|
18
50
|
export class RegisteredAction {
|
|
19
51
|
name;
|
|
20
52
|
description;
|
|
@@ -32,10 +64,12 @@ export class RegisteredAction {
|
|
|
32
64
|
this.pageFilter = pageFilter;
|
|
33
65
|
this.terminates_sequence = terminates_sequence;
|
|
34
66
|
}
|
|
35
|
-
|
|
67
|
+
// Returns the JSON Schema rendered for the LLM prompt, with the same
|
|
68
|
+
// skipKeys logic applied as in `promptDescription`. Exposed so tooling
|
|
69
|
+
// (e.g. scripts/dump-schema.ts) can exercise the exact code path the
|
|
70
|
+
// model sees.
|
|
71
|
+
getPromptJsonSchema() {
|
|
36
72
|
const skipKeys = new Set(['title']);
|
|
37
|
-
let description = `${this.description}: \n`;
|
|
38
|
-
description += `{${this.name}: `;
|
|
39
73
|
const schemaShape = (this.paramSchema instanceof z.ZodObject && this.paramSchema.shape) ||
|
|
40
74
|
('shape' in this.paramSchema ? this.paramSchema.shape : null);
|
|
41
75
|
const hideStructuredDoneSuccess = Boolean(this.name === 'done' &&
|
|
@@ -46,26 +80,19 @@ export class RegisteredAction {
|
|
|
46
80
|
if (hideStructuredDoneSuccess) {
|
|
47
81
|
skipKeys.add('success');
|
|
48
82
|
}
|
|
49
|
-
const hideExtractOutputSchema = Boolean(this.name === 'extract_structured_data' &&
|
|
83
|
+
const hideExtractOutputSchema = Boolean((this.name === 'extract_structured_data' || this.name === 'extract') &&
|
|
50
84
|
schemaShape &&
|
|
51
85
|
typeof schemaShape === 'object' &&
|
|
52
86
|
Object.prototype.hasOwnProperty.call(schemaShape, 'output_schema'));
|
|
53
87
|
if (hideExtractOutputSchema) {
|
|
54
88
|
skipKeys.add('output_schema');
|
|
55
89
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
return [key, cleanEntries];
|
|
63
|
-
}));
|
|
64
|
-
description += JSON.stringify(props);
|
|
65
|
-
}
|
|
66
|
-
else {
|
|
67
|
-
description += '{}';
|
|
68
|
-
}
|
|
90
|
+
return renderParamsJsonSchema(this.paramSchema, skipKeys);
|
|
91
|
+
}
|
|
92
|
+
promptDescription() {
|
|
93
|
+
let description = `${this.description}: \n`;
|
|
94
|
+
description += `{${this.name}: `;
|
|
95
|
+
description += JSON.stringify(this.getPromptJsonSchema());
|
|
69
96
|
description += '}';
|
|
70
97
|
return description;
|
|
71
98
|
}
|
package/dist/observability.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createRequire } from 'node:module';
|
|
2
2
|
import { config as loadEnv } from 'dotenv';
|
|
3
3
|
import { createLogger } from './logging-config.js';
|
|
4
|
-
loadEnv();
|
|
4
|
+
loadEnv({ quiet: true });
|
|
5
5
|
const require = createRequire(import.meta.url);
|
|
6
6
|
const logger = createLogger('browser_use.observability');
|
|
7
7
|
let lmnrObserve = null;
|
package/dist/utils.js
CHANGED
|
@@ -10,7 +10,7 @@ import { fileURLToPath } from 'node:url';
|
|
|
10
10
|
import { config as loadEnv } from 'dotenv';
|
|
11
11
|
import * as minimatchModule from 'minimatch';
|
|
12
12
|
import { createLogger } from './logging-config.js';
|
|
13
|
-
loadEnv();
|
|
13
|
+
loadEnv({ quiet: true });
|
|
14
14
|
const logger = createLogger('browser_use.utils');
|
|
15
15
|
let _exiting = false;
|
|
16
16
|
const minimatch = (minimatchModule.minimatch ??
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "browser-use",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "A TypeScript-first library for programmatic browser control, designed for building AI-powered web agents.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -264,6 +264,11 @@
|
|
|
264
264
|
"engines": {
|
|
265
265
|
"node": ">=18.0.0"
|
|
266
266
|
},
|
|
267
|
+
"config": {
|
|
268
|
+
"commitizen": {
|
|
269
|
+
"path": "cz-conventional-changelog"
|
|
270
|
+
}
|
|
271
|
+
},
|
|
267
272
|
"keywords": [
|
|
268
273
|
"browser",
|
|
269
274
|
"use",
|
|
@@ -285,7 +290,7 @@
|
|
|
285
290
|
"@google/genai": "^1.40.0",
|
|
286
291
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
287
292
|
"adm-zip": "^0.5.16",
|
|
288
|
-
"axios": "^1.
|
|
293
|
+
"axios": "^1.16.0",
|
|
289
294
|
"canvas": "^3.2.1",
|
|
290
295
|
"dotenv": "^17.2.4",
|
|
291
296
|
"eventemitter3": "^5.0.4",
|
|
@@ -318,6 +323,7 @@
|
|
|
318
323
|
"@typescript-eslint/parser": "^8.54.0",
|
|
319
324
|
"@vitest/coverage-v8": "^4.0.18",
|
|
320
325
|
"commitizen": "^4.3.1",
|
|
326
|
+
"cz-conventional-changelog": "^3.3.0",
|
|
321
327
|
"eslint": "^9.39.2",
|
|
322
328
|
"eslint-config-prettier": "^10.1.8",
|
|
323
329
|
"eslint-plugin-import": "^2.32.0",
|
|
@@ -326,14 +332,9 @@
|
|
|
326
332
|
"prettier": "^3.8.1",
|
|
327
333
|
"tsx": "^4.21.0",
|
|
328
334
|
"typescript": "^5.9.3",
|
|
329
|
-
"vite": "^7.3.
|
|
335
|
+
"vite": "^7.3.2",
|
|
330
336
|
"vitest": "^4.0.18"
|
|
331
337
|
},
|
|
332
|
-
"config": {
|
|
333
|
-
"commitizen": {
|
|
334
|
-
"path": "cz-conventional-changelog"
|
|
335
|
-
}
|
|
336
|
-
},
|
|
337
338
|
"scripts": {
|
|
338
339
|
"build": "node scripts/clean-dist.mjs && tsc && node scripts/copy-dom-tree.mjs",
|
|
339
340
|
"build:watch": "tsc --watch --preserveWatchOutput",
|
|
@@ -349,12 +350,12 @@
|
|
|
349
350
|
"test:watch": "vitest --watch",
|
|
350
351
|
"test:pack": "node scripts/smoke-pack.mjs",
|
|
351
352
|
"check": "pnpm lint && pnpm typecheck && pnpm typecheck:test && pnpm test:unit && pnpm test:integration && pnpm test:e2e && pnpm test:pack",
|
|
353
|
+
"commit": "cz",
|
|
352
354
|
"typecheck": "tsc --noEmit",
|
|
353
355
|
"typecheck:test": "tsc -p tsconfig.test.json --noEmit",
|
|
354
356
|
"format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
|
|
355
357
|
"format:check": "prettier --check \"src/**/*.ts\" \"test/**/*.ts\"",
|
|
356
358
|
"prettier": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
|
|
357
|
-
"commit": "pnpm exec git-cz",
|
|
358
359
|
"postinstall": "playwright install chromium",
|
|
359
360
|
"postinstall:ci": "playwright install --with-deps chromium"
|
|
360
361
|
}
|