browser-use 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +761 -0
  3. package/dist/agent/cloud-events.d.ts +264 -0
  4. package/dist/agent/cloud-events.js +318 -0
  5. package/dist/agent/gif.d.ts +15 -0
  6. package/dist/agent/gif.js +215 -0
  7. package/dist/agent/index.d.ts +8 -0
  8. package/dist/agent/index.js +8 -0
  9. package/dist/agent/message-manager/service.d.ts +30 -0
  10. package/dist/agent/message-manager/service.js +208 -0
  11. package/dist/agent/message-manager/utils.d.ts +2 -0
  12. package/dist/agent/message-manager/utils.js +41 -0
  13. package/dist/agent/message-manager/views.d.ts +26 -0
  14. package/dist/agent/message-manager/views.js +73 -0
  15. package/dist/agent/prompts.d.ts +52 -0
  16. package/dist/agent/prompts.js +259 -0
  17. package/dist/agent/service.d.ts +290 -0
  18. package/dist/agent/service.js +2200 -0
  19. package/dist/agent/views.d.ts +741 -0
  20. package/dist/agent/views.js +537 -0
  21. package/dist/browser/browser.d.ts +7 -0
  22. package/dist/browser/browser.js +5 -0
  23. package/dist/browser/context.d.ts +8 -0
  24. package/dist/browser/context.js +4 -0
  25. package/dist/browser/dvd-screensaver.d.ts +101 -0
  26. package/dist/browser/dvd-screensaver.js +270 -0
  27. package/dist/browser/extensions.d.ts +63 -0
  28. package/dist/browser/extensions.js +359 -0
  29. package/dist/browser/index.d.ts +10 -0
  30. package/dist/browser/index.js +9 -0
  31. package/dist/browser/playwright-manager.d.ts +47 -0
  32. package/dist/browser/playwright-manager.js +146 -0
  33. package/dist/browser/profile.d.ts +196 -0
  34. package/dist/browser/profile.js +815 -0
  35. package/dist/browser/session.d.ts +505 -0
  36. package/dist/browser/session.js +3409 -0
  37. package/dist/browser/types.d.ts +1184 -0
  38. package/dist/browser/types.js +1 -0
  39. package/dist/browser/utils.d.ts +1 -0
  40. package/dist/browser/utils.js +19 -0
  41. package/dist/browser/views.d.ts +78 -0
  42. package/dist/browser/views.js +72 -0
  43. package/dist/cli.d.ts +2 -0
  44. package/dist/cli.js +44 -0
  45. package/dist/config.d.ts +108 -0
  46. package/dist/config.js +430 -0
  47. package/dist/controller/index.d.ts +3 -0
  48. package/dist/controller/index.js +3 -0
  49. package/dist/controller/registry/index.d.ts +2 -0
  50. package/dist/controller/registry/index.js +2 -0
  51. package/dist/controller/registry/service.d.ts +45 -0
  52. package/dist/controller/registry/service.js +184 -0
  53. package/dist/controller/registry/views.d.ts +55 -0
  54. package/dist/controller/registry/views.js +174 -0
  55. package/dist/controller/service.d.ts +49 -0
  56. package/dist/controller/service.js +1176 -0
  57. package/dist/controller/views.d.ts +241 -0
  58. package/dist/controller/views.js +88 -0
  59. package/dist/dom/clickable-element-processor/service.d.ts +11 -0
  60. package/dist/dom/clickable-element-processor/service.js +60 -0
  61. package/dist/dom/dom_tree/index.js +1400 -0
  62. package/dist/dom/history-tree-processor/service.d.ts +14 -0
  63. package/dist/dom/history-tree-processor/service.js +75 -0
  64. package/dist/dom/history-tree-processor/view.d.ts +54 -0
  65. package/dist/dom/history-tree-processor/view.js +56 -0
  66. package/dist/dom/playground/extraction.d.ts +19 -0
  67. package/dist/dom/playground/extraction.js +187 -0
  68. package/dist/dom/playground/process-dom.d.ts +1 -0
  69. package/dist/dom/playground/process-dom.js +5 -0
  70. package/dist/dom/playground/test-accessibility.d.ts +44 -0
  71. package/dist/dom/playground/test-accessibility.js +111 -0
  72. package/dist/dom/service.d.ts +19 -0
  73. package/dist/dom/service.js +227 -0
  74. package/dist/dom/utils.d.ts +1 -0
  75. package/dist/dom/utils.js +6 -0
  76. package/dist/dom/views.d.ts +61 -0
  77. package/dist/dom/views.js +247 -0
  78. package/dist/event-bus.d.ts +11 -0
  79. package/dist/event-bus.js +19 -0
  80. package/dist/exceptions.d.ts +10 -0
  81. package/dist/exceptions.js +22 -0
  82. package/dist/filesystem/file-system.d.ts +68 -0
  83. package/dist/filesystem/file-system.js +412 -0
  84. package/dist/filesystem/index.d.ts +1 -0
  85. package/dist/filesystem/index.js +1 -0
  86. package/dist/index.d.ts +31 -0
  87. package/dist/index.js +33 -0
  88. package/dist/integrations/gmail/actions.d.ts +12 -0
  89. package/dist/integrations/gmail/actions.js +113 -0
  90. package/dist/integrations/gmail/index.d.ts +2 -0
  91. package/dist/integrations/gmail/index.js +2 -0
  92. package/dist/integrations/gmail/service.d.ts +61 -0
  93. package/dist/integrations/gmail/service.js +260 -0
  94. package/dist/llm/anthropic/chat.d.ts +28 -0
  95. package/dist/llm/anthropic/chat.js +126 -0
  96. package/dist/llm/anthropic/index.d.ts +2 -0
  97. package/dist/llm/anthropic/index.js +2 -0
  98. package/dist/llm/anthropic/serializer.d.ts +68 -0
  99. package/dist/llm/anthropic/serializer.js +285 -0
  100. package/dist/llm/aws/chat-anthropic.d.ts +61 -0
  101. package/dist/llm/aws/chat-anthropic.js +176 -0
  102. package/dist/llm/aws/chat-bedrock.d.ts +15 -0
  103. package/dist/llm/aws/chat-bedrock.js +80 -0
  104. package/dist/llm/aws/index.d.ts +3 -0
  105. package/dist/llm/aws/index.js +3 -0
  106. package/dist/llm/aws/serializer.d.ts +5 -0
  107. package/dist/llm/aws/serializer.js +68 -0
  108. package/dist/llm/azure/chat.d.ts +15 -0
  109. package/dist/llm/azure/chat.js +83 -0
  110. package/dist/llm/azure/index.d.ts +1 -0
  111. package/dist/llm/azure/index.js +1 -0
  112. package/dist/llm/base.d.ts +16 -0
  113. package/dist/llm/base.js +1 -0
  114. package/dist/llm/deepseek/chat.d.ts +15 -0
  115. package/dist/llm/deepseek/chat.js +51 -0
  116. package/dist/llm/deepseek/index.d.ts +2 -0
  117. package/dist/llm/deepseek/index.js +2 -0
  118. package/dist/llm/deepseek/serializer.d.ts +6 -0
  119. package/dist/llm/deepseek/serializer.js +57 -0
  120. package/dist/llm/exceptions.d.ts +10 -0
  121. package/dist/llm/exceptions.js +18 -0
  122. package/dist/llm/google/chat.d.ts +20 -0
  123. package/dist/llm/google/chat.js +144 -0
  124. package/dist/llm/google/index.d.ts +2 -0
  125. package/dist/llm/google/index.js +2 -0
  126. package/dist/llm/google/serializer.d.ts +6 -0
  127. package/dist/llm/google/serializer.js +64 -0
  128. package/dist/llm/groq/chat.d.ts +15 -0
  129. package/dist/llm/groq/chat.js +52 -0
  130. package/dist/llm/groq/index.d.ts +3 -0
  131. package/dist/llm/groq/index.js +3 -0
  132. package/dist/llm/groq/parser.d.ts +32 -0
  133. package/dist/llm/groq/parser.js +189 -0
  134. package/dist/llm/groq/serializer.d.ts +6 -0
  135. package/dist/llm/groq/serializer.js +56 -0
  136. package/dist/llm/messages.d.ts +77 -0
  137. package/dist/llm/messages.js +157 -0
  138. package/dist/llm/ollama/chat.d.ts +15 -0
  139. package/dist/llm/ollama/chat.js +77 -0
  140. package/dist/llm/ollama/index.d.ts +2 -0
  141. package/dist/llm/ollama/index.js +2 -0
  142. package/dist/llm/ollama/serializer.d.ts +6 -0
  143. package/dist/llm/ollama/serializer.js +53 -0
  144. package/dist/llm/openai/chat.d.ts +38 -0
  145. package/dist/llm/openai/chat.js +174 -0
  146. package/dist/llm/openai/index.d.ts +3 -0
  147. package/dist/llm/openai/index.js +3 -0
  148. package/dist/llm/openai/like.d.ts +17 -0
  149. package/dist/llm/openai/like.js +19 -0
  150. package/dist/llm/openai/serializer.d.ts +6 -0
  151. package/dist/llm/openai/serializer.js +57 -0
  152. package/dist/llm/openrouter/chat.d.ts +15 -0
  153. package/dist/llm/openrouter/chat.js +74 -0
  154. package/dist/llm/openrouter/index.d.ts +2 -0
  155. package/dist/llm/openrouter/index.js +2 -0
  156. package/dist/llm/openrouter/serializer.d.ts +3 -0
  157. package/dist/llm/openrouter/serializer.js +3 -0
  158. package/dist/llm/schema.d.ts +6 -0
  159. package/dist/llm/schema.js +77 -0
  160. package/dist/llm/views.d.ts +15 -0
  161. package/dist/llm/views.js +12 -0
  162. package/dist/logging-config.d.ts +25 -0
  163. package/dist/logging-config.js +89 -0
  164. package/dist/mcp/client.d.ts +142 -0
  165. package/dist/mcp/client.js +638 -0
  166. package/dist/mcp/controller.d.ts +6 -0
  167. package/dist/mcp/controller.js +38 -0
  168. package/dist/mcp/index.d.ts +3 -0
  169. package/dist/mcp/index.js +3 -0
  170. package/dist/mcp/server.d.ts +134 -0
  171. package/dist/mcp/server.js +759 -0
  172. package/dist/observability-decorators.d.ts +158 -0
  173. package/dist/observability-decorators.js +286 -0
  174. package/dist/observability.d.ts +23 -0
  175. package/dist/observability.js +58 -0
  176. package/dist/screenshots/index.d.ts +1 -0
  177. package/dist/screenshots/index.js +1 -0
  178. package/dist/screenshots/service.d.ts +6 -0
  179. package/dist/screenshots/service.js +28 -0
  180. package/dist/sync/auth.d.ts +27 -0
  181. package/dist/sync/auth.js +205 -0
  182. package/dist/sync/index.d.ts +2 -0
  183. package/dist/sync/index.js +2 -0
  184. package/dist/sync/service.d.ts +21 -0
  185. package/dist/sync/service.js +146 -0
  186. package/dist/telemetry/index.d.ts +2 -0
  187. package/dist/telemetry/index.js +2 -0
  188. package/dist/telemetry/service.d.ts +12 -0
  189. package/dist/telemetry/service.js +85 -0
  190. package/dist/telemetry/views.d.ts +112 -0
  191. package/dist/telemetry/views.js +112 -0
  192. package/dist/tokens/index.d.ts +2 -0
  193. package/dist/tokens/index.js +2 -0
  194. package/dist/tokens/service.d.ts +35 -0
  195. package/dist/tokens/service.js +423 -0
  196. package/dist/tokens/views.d.ts +58 -0
  197. package/dist/tokens/views.js +1 -0
  198. package/dist/utils.d.ts +128 -0
  199. package/dist/utils.js +529 -0
  200. package/package.json +94 -5
@@ -0,0 +1,759 @@
1
+ /**
2
+ * MCP Server for browser-use - exposes browser automation capabilities via Model Context Protocol.
3
+ *
4
+ * This server provides tools for:
5
+ * - Running autonomous browser tasks with an AI agent
6
+ * - Direct browser control (navigation, clicking, typing, etc.)
7
+ * - Content extraction from web pages
8
+ * - File system operations
9
+ *
10
+ * Usage:
11
+ * npx browser-use --mcp
12
+ *
13
+ * Or as an MCP server in Claude Desktop or other MCP clients:
14
+ * {
15
+ * "mcpServers": {
16
+ * "browser-use": {
17
+ * "command": "npx",
18
+ * "args": ["browser-use", "--mcp"],
19
+ * "env": {
20
+ * "OPENAI_API_KEY": "sk-proj-1234567890"
21
+ * }
22
+ * }
23
+ * }
24
+ * }
25
+ */
26
+ import os from 'node:os';
27
+ import path from 'node:path';
28
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
29
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
30
+ import { CallToolRequestSchema, ListToolsRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
31
+ import { z } from 'zod';
32
+ import { zodToJsonSchema } from 'zod-to-json-schema';
33
+ import { createLogger } from '../logging-config.js';
34
+ import { Controller as DefaultController } from '../controller/service.js';
35
+ import { Agent } from '../agent/service.js';
36
+ import { BrowserSession } from '../browser/session.js';
37
+ import { BrowserProfile } from '../browser/profile.js';
38
+ import { FileSystem } from '../filesystem/file-system.js';
39
+ import { ChatOpenAI } from '../llm/openai/chat.js';
40
+ import { load_browser_use_config, get_default_llm, get_default_profile, } from '../config.js';
41
+ import { productTelemetry } from '../telemetry/service.js';
42
+ import { MCPServerTelemetryEvent } from '../telemetry/views.js';
43
+ import { get_browser_use_version } from '../utils.js';
44
+ // Redirect console logs to stderr to prevent JSON-RPC interference
45
+ const originalLog = console.log;
46
+ const originalInfo = console.info;
47
+ const originalWarn = console.warn;
48
+ const originalError = console.error;
49
+ console.log = (...args) => console.error(...args);
50
+ console.info = (...args) => console.error(...args);
51
+ console.warn = (...args) => console.error(...args);
52
+ const logger = createLogger('browser_use.mcp.server');
53
+ export class MCPServer {
54
+ server;
55
+ tools = {};
56
+ prompts = new Map();
57
+ config;
58
+ browserSession = null;
59
+ controller = null;
60
+ llm = null;
61
+ fileSystem = null;
62
+ startTime;
63
+ isRunning = false;
64
+ toolExecutionCount = 0;
65
+ errorCount = 0;
66
+ abortController = null;
67
+ constructor(name, version) {
68
+ this.server = new Server({
69
+ name,
70
+ version,
71
+ }, {
72
+ capabilities: {
73
+ tools: {},
74
+ prompts: {},
75
+ },
76
+ });
77
+ this.config = load_browser_use_config();
78
+ this.startTime = Date.now() / 1000;
79
+ this.setupHandlers();
80
+ this.registerDefaultPrompts();
81
+ this.controller = new DefaultController();
82
+ this.registerControllerActions(this.controller);
83
+ this.registerCoreBrowserTools();
84
+ }
85
+ resolvePath(input) {
86
+ const expanded = input.replace(/^~(?=$|\/|\\)/, os.homedir());
87
+ return path.resolve(expanded);
88
+ }
89
+ getDefaultProfileConfig() {
90
+ const profile = get_default_profile(this.config);
91
+ return profile && typeof profile === 'object' ? { ...profile } : {};
92
+ }
93
+ getDefaultLlmConfig() {
94
+ const llm = get_default_llm(this.config);
95
+ return llm && typeof llm === 'object' ? { ...llm } : {};
96
+ }
97
+ sanitizeProfileConfig(profileConfig) {
98
+ const sanitized = { ...profileConfig };
99
+ delete sanitized.id;
100
+ delete sanitized.default;
101
+ delete sanitized.created_at;
102
+ return sanitized;
103
+ }
104
+ buildDirectSessionProfile(profileConfig) {
105
+ const merged = {
106
+ downloads_path: '~/Downloads/browser-use-mcp',
107
+ wait_between_actions: 0.5,
108
+ keep_alive: true,
109
+ user_data_dir: '~/.config/browseruse/profiles/default',
110
+ is_mobile: false,
111
+ device_scale_factor: 1.0,
112
+ disable_security: false,
113
+ headless: false,
114
+ ...this.sanitizeProfileConfig(profileConfig),
115
+ };
116
+ if (typeof merged.user_data_dir === 'string') {
117
+ merged.user_data_dir = this.resolvePath(merged.user_data_dir);
118
+ }
119
+ if (typeof merged.downloads_path === 'string') {
120
+ merged.downloads_path = this.resolvePath(merged.downloads_path);
121
+ }
122
+ if (Array.isArray(merged.allowed_domains)) {
123
+ merged.allowed_domains = merged.allowed_domains
124
+ .map((entry) => String(entry).trim())
125
+ .filter(Boolean);
126
+ }
127
+ return new BrowserProfile(merged);
128
+ }
129
+ buildRetryProfile(profileConfig, allowedDomains) {
130
+ const merged = {
131
+ ...this.sanitizeProfileConfig(profileConfig),
132
+ };
133
+ if (allowedDomains !== undefined) {
134
+ merged.allowed_domains = allowedDomains;
135
+ }
136
+ if (merged.keep_alive == null) {
137
+ merged.keep_alive = false;
138
+ }
139
+ if (typeof merged.user_data_dir === 'string') {
140
+ merged.user_data_dir = this.resolvePath(merged.user_data_dir);
141
+ }
142
+ if (typeof merged.downloads_path === 'string') {
143
+ merged.downloads_path = this.resolvePath(merged.downloads_path);
144
+ }
145
+ if (Array.isArray(merged.allowed_domains)) {
146
+ merged.allowed_domains = merged.allowed_domains
147
+ .map((entry) => String(entry).trim())
148
+ .filter(Boolean);
149
+ }
150
+ return new BrowserProfile(merged);
151
+ }
152
+ initializeLlmForDirectTools() {
153
+ if (this.llm) {
154
+ return;
155
+ }
156
+ const llmConfig = this.getDefaultLlmConfig();
157
+ const configuredApiKey = typeof llmConfig.api_key === 'string' ? llmConfig.api_key.trim() : '';
158
+ const envApiKey = typeof process.env.OPENAI_API_KEY === 'string'
159
+ ? process.env.OPENAI_API_KEY.trim()
160
+ : '';
161
+ const apiKey = configuredApiKey || envApiKey;
162
+ if (!apiKey) {
163
+ return;
164
+ }
165
+ const model = typeof llmConfig.model === 'string' && llmConfig.model.trim()
166
+ ? llmConfig.model.trim()
167
+ : 'gpt-4o-mini';
168
+ const temperature = typeof llmConfig.temperature === 'number' ? llmConfig.temperature : 0.7;
169
+ this.llm = new ChatOpenAI({
170
+ model,
171
+ apiKey,
172
+ temperature,
173
+ });
174
+ }
175
+ initializeFileSystem(profileConfig) {
176
+ if (this.fileSystem) {
177
+ return;
178
+ }
179
+ const configuredPath = typeof profileConfig.file_system_path === 'string'
180
+ ? profileConfig.file_system_path
181
+ : '~/.browser-use-mcp';
182
+ this.fileSystem = new FileSystem(this.resolvePath(configuredPath));
183
+ }
184
+ formatRetryResult(history) {
185
+ const results = [];
186
+ const steps = Array.isArray(history?.history) || typeof history?.number_of_steps === 'function'
187
+ ? typeof history?.number_of_steps === 'function'
188
+ ? history.number_of_steps()
189
+ : history.history.length
190
+ : 0;
191
+ results.push(`Task completed in ${steps} steps`);
192
+ results.push(`Success: ${String(history?.is_successful?.())}`);
193
+ const finalResult = history?.final_result?.();
194
+ if (finalResult) {
195
+ results.push(`\nFinal result:\n${finalResult}`);
196
+ }
197
+ const errors = Array.isArray(history?.errors?.())
198
+ ? history.errors().filter((entry) => entry != null)
199
+ : [];
200
+ if (errors.length > 0) {
201
+ results.push(`\nErrors encountered:\n${JSON.stringify(errors, null, 2)}`);
202
+ }
203
+ const urls = Array.isArray(history?.urls?.())
204
+ ? history
205
+ .urls()
206
+ .filter((entry) => entry != null)
207
+ .map((entry) => String(entry))
208
+ : [];
209
+ if (urls.length > 0) {
210
+ results.push(`\nURLs visited: ${urls.join(', ')}`);
211
+ }
212
+ return results.join('\n');
213
+ }
214
+ setupHandlers() {
215
+ // List available tools
216
+ this.server.setRequestHandler(ListToolsRequestSchema, async () => {
217
+ return {
218
+ tools: Object.entries(this.tools).map(([name, tool]) => ({
219
+ name,
220
+ description: tool.description,
221
+ inputSchema: tool.inputSchema,
222
+ })),
223
+ };
224
+ });
225
+ // Execute tool
226
+ this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
227
+ const startTime = Date.now() / 1000;
228
+ let errorMsg = null;
229
+ try {
230
+ const tool = this.tools[request.params.name];
231
+ if (!tool) {
232
+ throw new Error(`Tool not found: ${request.params.name}`);
233
+ }
234
+ logger.debug(`Executing tool: ${request.params.name}`);
235
+ this.toolExecutionCount++;
236
+ const result = await tool.handler(request.params.arguments || {});
237
+ return {
238
+ content: [
239
+ {
240
+ type: 'text',
241
+ text: typeof result === 'string'
242
+ ? result
243
+ : JSON.stringify(result, null, 2),
244
+ },
245
+ ],
246
+ };
247
+ }
248
+ catch (error) {
249
+ this.errorCount++;
250
+ errorMsg = error instanceof Error ? error.message : String(error);
251
+ logger.error(`Tool execution failed: ${errorMsg}`);
252
+ return {
253
+ content: [
254
+ {
255
+ type: 'text',
256
+ text: `Error: ${errorMsg}`,
257
+ },
258
+ ],
259
+ isError: true,
260
+ };
261
+ }
262
+ finally {
263
+ // Capture telemetry for tool calls
264
+ const duration = Date.now() / 1000 - startTime;
265
+ productTelemetry.capture(new MCPServerTelemetryEvent({
266
+ version: get_browser_use_version(),
267
+ action: 'tool_call',
268
+ tool_name: request.params.name,
269
+ duration_seconds: duration,
270
+ error_message: errorMsg,
271
+ }));
272
+ }
273
+ });
274
+ // List available prompts
275
+ this.server.setRequestHandler(ListPromptsRequestSchema, async () => {
276
+ return {
277
+ prompts: Array.from(this.prompts.values()).map((prompt) => ({
278
+ name: prompt.name,
279
+ description: prompt.description,
280
+ arguments: prompt.arguments,
281
+ })),
282
+ };
283
+ });
284
+ // Get prompt with arguments
285
+ this.server.setRequestHandler(GetPromptRequestSchema, async (request) => {
286
+ const prompt = this.prompts.get(request.params.name);
287
+ if (!prompt) {
288
+ throw new Error(`Prompt not found: ${request.params.name}`);
289
+ }
290
+ const args = request.params.arguments || {};
291
+ const message = prompt.template(args);
292
+ return {
293
+ messages: [
294
+ {
295
+ role: 'user',
296
+ content: {
297
+ type: 'text',
298
+ text: message,
299
+ },
300
+ },
301
+ ],
302
+ };
303
+ });
304
+ }
305
+ async ensureController() {
306
+ if (!this.controller) {
307
+ this.controller = new DefaultController();
308
+ this.registerControllerActions(this.controller);
309
+ }
310
+ return this.controller;
311
+ }
312
+ async ensureBrowserSession() {
313
+ if (!this.browserSession) {
314
+ const profileConfig = this.getDefaultProfileConfig();
315
+ const profile = this.buildDirectSessionProfile(profileConfig);
316
+ this.browserSession = new BrowserSession({ browser_profile: profile });
317
+ this.initializeLlmForDirectTools();
318
+ this.initializeFileSystem(profileConfig);
319
+ }
320
+ if (!this.browserSession.initialized) {
321
+ await this.browserSession.start();
322
+ }
323
+ return this.browserSession;
324
+ }
325
+ async executeControllerAction(actionName, args) {
326
+ const controller = await this.ensureController();
327
+ const browserSession = await this.ensureBrowserSession();
328
+ if (actionName === 'extract_structured_data' && !this.llm) {
329
+ throw new Error('LLM not initialized (set OPENAI_API_KEY)');
330
+ }
331
+ return await controller.registry.execute_action(actionName, args, {
332
+ browser_session: browserSession,
333
+ page_extraction_llm: this.llm,
334
+ file_system: this.fileSystem,
335
+ available_file_paths: Array.isArray(browserSession.downloaded_files)
336
+ ? [...browserSession.downloaded_files]
337
+ : null,
338
+ context: undefined,
339
+ });
340
+ }
341
+ registerCoreBrowserTools() {
342
+ this.registerTool('browser_navigate', 'Navigate to a URL in the browser', z.object({
343
+ url: z.string(),
344
+ new_tab: z.boolean().default(false),
345
+ }), async (args) => this.executeControllerAction('go_to_url', {
346
+ url: String(args?.url ?? ''),
347
+ new_tab: Boolean(args?.new_tab),
348
+ }));
349
+ this.registerTool('browser_click', 'Click an element on the page by index from browser_get_state', z.object({
350
+ index: z.number().int(),
351
+ new_tab: z.boolean().optional().default(false),
352
+ }), async (args) => {
353
+ const browserSession = await this.ensureBrowserSession();
354
+ const index = Number(args?.index);
355
+ const openInNewTab = Boolean(args?.new_tab);
356
+ if (!openInNewTab) {
357
+ return this.executeControllerAction('click_element_by_index', {
358
+ index,
359
+ });
360
+ }
361
+ const element = await browserSession.get_dom_element_by_index(index);
362
+ if (!element) {
363
+ throw new Error(`Element with index ${index} not found`);
364
+ }
365
+ const href = element?.attributes?.href;
366
+ if (typeof href === 'string' && href.trim()) {
367
+ const currentPage = await browserSession.get_current_page();
368
+ const currentUrl = typeof currentPage?.url === 'function' ? currentPage.url() : '';
369
+ let targetUrl = href.trim();
370
+ try {
371
+ if (currentUrl) {
372
+ targetUrl = new URL(targetUrl, currentUrl).toString();
373
+ }
374
+ }
375
+ catch {
376
+ // Keep the original href if URL resolution fails.
377
+ }
378
+ await browserSession.create_new_tab(targetUrl);
379
+ const tabIndex = typeof browserSession.active_tab_index === 'number'
380
+ ? browserSession.active_tab_index
381
+ : null;
382
+ if (tabIndex !== null) {
383
+ return `Clicked element ${index} and opened in new tab #${tabIndex}: ${targetUrl}`;
384
+ }
385
+ return `Clicked element ${index} and opened new tab: ${targetUrl}`;
386
+ }
387
+ const locator = typeof browserSession.get_locate_element === 'function'
388
+ ? await browserSession.get_locate_element(element)
389
+ : null;
390
+ if (locator && typeof locator.click === 'function') {
391
+ const modifier = process.platform === 'darwin' ? 'Meta' : 'Control';
392
+ await locator.click({ modifiers: [modifier] });
393
+ await new Promise((resolve) => setTimeout(resolve, 500));
394
+ return `Clicked element ${index} with ${modifier} key (new tab if supported)`;
395
+ }
396
+ // Fallback: if no href exists, perform a normal click.
397
+ return this.executeControllerAction('click_element_by_index', {
398
+ index,
399
+ });
400
+ });
401
+ this.registerTool('browser_type', 'Type text into an input field by index from browser_get_state', z.object({
402
+ index: z.number().int(),
403
+ text: z.string(),
404
+ }), async (args) => this.executeControllerAction('input_text', {
405
+ index: Number(args?.index),
406
+ text: String(args?.text ?? ''),
407
+ }));
408
+ this.registerTool('browser_get_state', 'Get the current state of the page including interactive elements', z
409
+ .object({
410
+ include_screenshot: z.boolean().default(false),
411
+ })
412
+ .default({ include_screenshot: false }), async (args) => {
413
+ const browserSession = await this.ensureBrowserSession();
414
+ const state = await browserSession.get_browser_state_with_recovery({
415
+ include_screenshot: Boolean(args?.include_screenshot),
416
+ cache_clickable_elements_hashes: true,
417
+ });
418
+ return {
419
+ url: state.url,
420
+ title: state.title,
421
+ tabs: state.tabs,
422
+ page_info: state.page_info,
423
+ pixels_above: state.pixels_above,
424
+ pixels_below: state.pixels_below,
425
+ browser_errors: state.browser_errors,
426
+ loading_status: state.loading_status,
427
+ screenshot: state.screenshot,
428
+ interactive_elements: state.element_tree.clickable_elements_to_string(),
429
+ interactive_count: Object.keys(state.selector_map ?? {}).length,
430
+ };
431
+ });
432
+ this.registerTool('browser_extract_content', 'Extract structured content from the current page', z.object({
433
+ query: z.string(),
434
+ extract_links: z.boolean().default(false),
435
+ }), async (args) => this.executeControllerAction('extract_structured_data', {
436
+ query: String(args?.query ?? ''),
437
+ extract_links: Boolean(args?.extract_links),
438
+ }));
439
+ this.registerTool('browser_scroll', 'Scroll the page up or down', z
440
+ .object({
441
+ direction: z.enum(['up', 'down']).default('down'),
442
+ })
443
+ .default({ direction: 'down' }), async (args) => this.executeControllerAction('scroll', {
444
+ down: (args?.direction ?? 'down') !== 'up',
445
+ num_pages: 1,
446
+ }));
447
+ this.registerTool('browser_go_back', 'Go back to the previous page', z.object({}).strict(), async () => this.executeControllerAction('go_back', {}));
448
+ this.registerTool('browser_list_tabs', 'List all open tabs', z.object({}).strict(), async () => {
449
+ const browserSession = await this.ensureBrowserSession();
450
+ return browserSession.get_tabs_info();
451
+ });
452
+ this.registerTool('browser_switch_tab', 'Switch to a tab by index', z.object({
453
+ tab_index: z.number().int(),
454
+ }), async (args) => this.executeControllerAction('switch_tab', {
455
+ page_id: Number(args?.tab_index),
456
+ }));
457
+ this.registerTool('browser_close_tab', 'Close a tab by index', z.object({
458
+ tab_index: z.number().int(),
459
+ }), async (args) => this.executeControllerAction('close_tab', {
460
+ page_id: Number(args?.tab_index),
461
+ }));
462
+ this.registerTool('retry_with_browser_use_agent', 'Retry a complex task with the browser-use autonomous agent', z.object({
463
+ task: z.string(),
464
+ max_steps: z.number().int().optional().default(100),
465
+ model: z.string().optional().default('gpt-4o'),
466
+ allowed_domains: z.array(z.string()).optional().default([]),
467
+ use_vision: z.boolean().optional().default(true),
468
+ }), async (args) => {
469
+ const task = String(args?.task ?? '').trim();
470
+ if (!task) {
471
+ throw new Error('task is required');
472
+ }
473
+ const model = String(args?.model ?? 'gpt-4o').trim();
474
+ const maxSteps = Number(args?.max_steps ?? 100);
475
+ const useVision = Boolean(args?.use_vision ?? true);
476
+ const allowedDomains = Array.isArray(args?.allowed_domains)
477
+ ? args.allowed_domains
478
+ .map((entry) => String(entry).trim())
479
+ .filter(Boolean)
480
+ : [];
481
+ const llmConfig = this.getDefaultLlmConfig();
482
+ const configuredApiKey = typeof llmConfig.api_key === 'string' ? llmConfig.api_key.trim() : '';
483
+ const envApiKey = typeof process.env.OPENAI_API_KEY === 'string'
484
+ ? process.env.OPENAI_API_KEY.trim()
485
+ : '';
486
+ const apiKey = configuredApiKey || envApiKey;
487
+ if (!apiKey) {
488
+ return 'Error: OPENAI_API_KEY not set in config or environment';
489
+ }
490
+ const configuredModel = typeof llmConfig.model === 'string' && llmConfig.model.trim()
491
+ ? llmConfig.model.trim()
492
+ : 'gpt-4o';
493
+ const llmModel = model || configuredModel;
494
+ const temperature = typeof llmConfig.temperature === 'number' ? llmConfig.temperature : 0.7;
495
+ const llm = new ChatOpenAI({
496
+ model: llmModel,
497
+ apiKey,
498
+ temperature,
499
+ });
500
+ const profileConfig = this.getDefaultProfileConfig();
501
+ const profile = this.buildRetryProfile(profileConfig, allowedDomains);
502
+ const retryBrowserSession = new BrowserSession({
503
+ browser_profile: profile,
504
+ });
505
+ const agent = new Agent({
506
+ task,
507
+ llm,
508
+ browser_session: retryBrowserSession,
509
+ use_vision: useVision,
510
+ });
511
+ try {
512
+ const history = await agent.run(maxSteps);
513
+ return this.formatRetryResult(history);
514
+ }
515
+ catch (error) {
516
+ const message = error instanceof Error ? error.message : String(error);
517
+ return `Agent task failed: ${message}`;
518
+ }
519
+ finally {
520
+ await agent.close();
521
+ }
522
+ });
523
+ }
524
+ /**
525
+ * Register default prompts for common browser automation tasks
526
+ */
527
+ registerDefaultPrompts() {
528
+ // Scrape data prompt
529
+ this.registerPrompt({
530
+ name: 'scrape_data',
531
+ description: 'Extract structured data from a website',
532
+ arguments: [
533
+ { name: 'url', description: 'URL to scrape', required: true },
534
+ {
535
+ name: 'data_type',
536
+ description: 'Type of data to extract',
537
+ required: true,
538
+ },
539
+ ],
540
+ template: (args) => `Use browser_navigate to go to ${args.url}, then use browser_extract_content to extract ${args.data_type}. If the page requires interaction, use browser_get_state to find elements and browser_click/browser_type as needed.`,
541
+ });
542
+ // Fill form prompt
543
+ this.registerPrompt({
544
+ name: 'fill_form',
545
+ description: 'Fill out and submit a web form',
546
+ arguments: [
547
+ { name: 'url', description: 'URL of the form', required: true },
548
+ {
549
+ name: 'field_data',
550
+ description: 'JSON object with field values',
551
+ required: true,
552
+ },
553
+ ],
554
+ template: (args) => `Navigate to ${args.url}, use browser_get_state to identify form fields, then use browser_type to fill in: ${args.field_data}. Finally, click the submit button.`,
555
+ });
556
+ // Multi-step task prompt
557
+ this.registerPrompt({
558
+ name: 'multi_step_task',
559
+ description: 'Execute a complex multi-step task',
560
+ arguments: [
561
+ {
562
+ name: 'task_description',
563
+ description: 'Detailed description of the task',
564
+ required: true,
565
+ },
566
+ {
567
+ name: 'max_steps',
568
+ description: 'Maximum number of steps (default: 100)',
569
+ required: false,
570
+ },
571
+ ],
572
+ template: (args) => `Use retry_with_browser_use_agent with task: '${args.task_description}'. Set max_steps=${args.max_steps || '100'} and use_vision=true for better understanding.`,
573
+ });
574
+ // Research topic prompt
575
+ this.registerPrompt({
576
+ name: 'research_topic',
577
+ description: 'Research a topic across multiple websites',
578
+ arguments: [
579
+ { name: 'topic', description: 'Topic to research', required: true },
580
+ {
581
+ name: 'sites',
582
+ description: 'Comma-separated list of websites',
583
+ required: true,
584
+ },
585
+ ],
586
+ template: (args) => `Open multiple tabs using browser_navigate with new_tab=true for sites: ${args.sites}. Use browser_extract_content on each to gather information about ${args.topic}. Switch between tabs with browser_switch_tab.`,
587
+ });
588
+ }
589
+ /**
590
+ * Register a tool with the MCP server
591
+ */
592
+ registerTool(name, description, inputSchema, handler) {
593
+ this.tools[name] = {
594
+ description,
595
+ inputSchema: inputSchema instanceof z.ZodType
596
+ ? zodToJsonSchema(inputSchema)
597
+ : inputSchema,
598
+ handler,
599
+ };
600
+ logger.debug(`Registered tool: ${name}`);
601
+ }
602
+ /**
603
+ * Register all Controller actions as MCP tools
604
+ */
605
+ async registerControllerActions(controller) {
606
+ this.controller = controller;
607
+ // Get all registered actions from the controller
608
+ const actions = controller.registry.get_all_actions();
609
+ for (const [actionName, actionInfo] of actions.entries()) {
610
+ // Create a wrapper for the action
611
+ const handler = async (args) => {
612
+ return this.executeControllerAction(actionName, args || {});
613
+ };
614
+ // Register the action as a tool
615
+ this.registerTool(actionName, actionInfo.description || `Execute ${actionName} action`, actionInfo.paramSchema ?? z.object({}).strict(), handler);
616
+ }
617
+ logger.info(`✅ Registered ${actions.size} controller actions as MCP tools`);
618
+ }
619
+ /**
620
+ * Initialize the browser session
621
+ */
622
+ async initBrowserSession(browserSession) {
623
+ this.browserSession = browserSession;
624
+ await this.browserSession.start();
625
+ logger.info('Browser session initialized');
626
+ }
627
+ /**
628
+ * Start the MCP server
629
+ */
630
+ async start() {
631
+ if (this.isRunning) {
632
+ logger.warning('MCP Server is already running');
633
+ return;
634
+ }
635
+ // Capture telemetry for server start
636
+ productTelemetry.capture(new MCPServerTelemetryEvent({
637
+ version: get_browser_use_version(),
638
+ action: 'start',
639
+ }));
640
+ try {
641
+ const transport = new StdioServerTransport();
642
+ await this.server.connect(transport);
643
+ this.isRunning = true;
644
+ logger.info(`🔌 MCP Server started (${this.getToolCount()} tools, ${this.getPromptCount()} prompts registered)`);
645
+ }
646
+ catch (error) {
647
+ this.isRunning = false;
648
+ logger.error(`Failed to start MCP server: ${error}`);
649
+ throw error;
650
+ }
651
+ }
652
+ /**
653
+ * Stop the MCP server and cleanup resources
654
+ */
655
+ async stop() {
656
+ if (!this.isRunning) {
657
+ logger.warning('MCP Server is not running');
658
+ return;
659
+ }
660
+ try {
661
+ this.isRunning = false;
662
+ // Cancel any pending operations
663
+ if (this.abortController) {
664
+ this.abortController.abort();
665
+ this.abortController = null;
666
+ }
667
+ // Close browser session if active
668
+ if (this.browserSession) {
669
+ await this.browserSession.stop();
670
+ this.browserSession = null;
671
+ logger.info('Browser session closed');
672
+ }
673
+ // Capture telemetry for server stop
674
+ const duration = Date.now() / 1000 - this.startTime;
675
+ productTelemetry.capture(new MCPServerTelemetryEvent({
676
+ version: get_browser_use_version(),
677
+ action: 'stop',
678
+ duration_seconds: duration,
679
+ }));
680
+ productTelemetry.flush();
681
+ const stats = this.getStats();
682
+ logger.info(`🔌 MCP Server stopped (uptime: ${Math.floor(stats.uptime)}s, executions: ${stats.executionCount}, success rate: ${(stats.successRate * 100).toFixed(1)}%)`);
683
+ }
684
+ catch (error) {
685
+ logger.error(`Error stopping MCP server: ${error}`);
686
+ }
687
+ }
688
+ /**
689
+ * Register a prompt template
690
+ */
691
+ registerPrompt(prompt) {
692
+ this.prompts.set(prompt.name, prompt);
693
+ logger.debug(`Registered prompt: ${prompt.name}`);
694
+ }
695
+ /**
696
+ * Get the number of registered tools
697
+ */
698
+ getToolCount() {
699
+ return Object.keys(this.tools).length;
700
+ }
701
+ /**
702
+ * Get the number of registered prompts
703
+ */
704
+ getPromptCount() {
705
+ return this.prompts.size;
706
+ }
707
+ /**
708
+ * Get server health status
709
+ */
710
+ getHealth() {
711
+ const uptime = Date.now() / 1000 - this.startTime;
712
+ const errorRate = this.toolExecutionCount > 0
713
+ ? this.errorCount / this.toolExecutionCount
714
+ : 0;
715
+ let status = 'healthy';
716
+ if (errorRate > 0.5) {
717
+ status = 'unhealthy';
718
+ }
719
+ else if (errorRate > 0.2) {
720
+ status = 'degraded';
721
+ }
722
+ return {
723
+ status,
724
+ uptime,
725
+ toolExecutionCount: this.toolExecutionCount,
726
+ errorCount: this.errorCount,
727
+ errorRate,
728
+ browserSessionActive: this.browserSession !== null,
729
+ };
730
+ }
731
+ /**
732
+ * Get server statistics
733
+ */
734
+ getStats() {
735
+ const health = this.getHealth();
736
+ return {
737
+ toolsRegistered: this.getToolCount(),
738
+ promptsRegistered: this.getPromptCount(),
739
+ uptime: health.uptime,
740
+ executionCount: this.toolExecutionCount,
741
+ errorCount: this.errorCount,
742
+ successRate: health.toolExecutionCount > 0 ? 1 - health.errorRate : 1,
743
+ };
744
+ }
745
+ /**
746
+ * Reset statistics
747
+ */
748
+ resetStats() {
749
+ this.toolExecutionCount = 0;
750
+ this.errorCount = 0;
751
+ logger.info('Statistics reset');
752
+ }
753
+ /**
754
+ * Check if server is running
755
+ */
756
+ isServerRunning() {
757
+ return this.isRunning;
758
+ }
759
+ }