browser-use 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +22 -17
  2. package/dist/agent/service.js +13 -2
  3. package/dist/agent/system_prompt.md +269 -0
  4. package/dist/agent/system_prompt_anthropic_flash.md +240 -0
  5. package/dist/agent/system_prompt_browser_use.md +18 -0
  6. package/dist/agent/system_prompt_browser_use_flash.md +15 -0
  7. package/dist/agent/system_prompt_browser_use_no_thinking.md +17 -0
  8. package/dist/agent/system_prompt_flash.md +16 -0
  9. package/dist/agent/system_prompt_flash_anthropic.md +30 -0
  10. package/dist/agent/system_prompt_no_thinking.md +245 -0
  11. package/dist/browser/cloud/index.d.ts +1 -0
  12. package/dist/browser/cloud/index.js +1 -0
  13. package/dist/browser/cloud/management.d.ts +130 -0
  14. package/dist/browser/cloud/management.js +140 -0
  15. package/dist/browser/events.d.ts +61 -3
  16. package/dist/browser/events.js +66 -0
  17. package/dist/browser/profile.d.ts +1 -0
  18. package/dist/browser/profile.js +1 -0
  19. package/dist/browser/session.d.ts +56 -2
  20. package/dist/browser/session.js +596 -24
  21. package/dist/browser/watchdogs/base.js +34 -1
  22. package/dist/browser/watchdogs/captcha-watchdog.d.ts +26 -0
  23. package/dist/browser/watchdogs/captcha-watchdog.js +151 -0
  24. package/dist/browser/watchdogs/index.d.ts +1 -0
  25. package/dist/browser/watchdogs/index.js +1 -0
  26. package/dist/browser/watchdogs/screenshot-watchdog.js +4 -3
  27. package/dist/cli.d.ts +120 -0
  28. package/dist/cli.js +1817 -5
  29. package/dist/config.js +1 -1
  30. package/dist/controller/registry/views.d.ts +2 -0
  31. package/dist/controller/registry/views.js +44 -17
  32. package/dist/controller/service.js +106 -362
  33. package/dist/controller/views.d.ts +9 -6
  34. package/dist/controller/views.js +8 -5
  35. package/dist/filesystem/file-system.js +1 -1
  36. package/dist/llm/litellm/chat.d.ts +11 -0
  37. package/dist/llm/litellm/chat.js +16 -0
  38. package/dist/llm/litellm/index.d.ts +1 -0
  39. package/dist/llm/litellm/index.js +1 -0
  40. package/dist/llm/models.js +29 -3
  41. package/dist/llm/oci-raw/chat.d.ts +64 -0
  42. package/dist/llm/oci-raw/chat.js +350 -0
  43. package/dist/llm/oci-raw/index.d.ts +2 -0
  44. package/dist/llm/oci-raw/index.js +2 -0
  45. package/dist/llm/oci-raw/serializer.d.ts +12 -0
  46. package/dist/llm/oci-raw/serializer.js +128 -0
  47. package/dist/mcp/server.d.ts +1 -0
  48. package/dist/mcp/server.js +62 -13
  49. package/dist/observability.js +1 -1
  50. package/dist/skill-cli/direct.d.ts +100 -0
  51. package/dist/skill-cli/direct.js +984 -0
  52. package/dist/skill-cli/index.d.ts +2 -0
  53. package/dist/skill-cli/index.js +2 -0
  54. package/dist/skill-cli/server.d.ts +2 -0
  55. package/dist/skill-cli/server.js +472 -11
  56. package/dist/skill-cli/tunnel.d.ts +61 -0
  57. package/dist/skill-cli/tunnel.js +257 -0
  58. package/dist/sync/auth.d.ts +8 -0
  59. package/dist/sync/auth.js +12 -0
  60. package/dist/utils.js +1 -1
  61. package/package.json +31 -12
@@ -0,0 +1,128 @@
1
+ import { AssistantMessage, ContentPartImageParam, ContentPartRefusalParam, ContentPartTextParam, SystemMessage, UserMessage, } from '../messages.js';
2
+ const textContent = (text) => ({
3
+ type: 'TEXT',
4
+ text,
5
+ });
6
+ const imageContent = (url) => ({
7
+ type: 'IMAGE',
8
+ imageUrl: {
9
+ url,
10
+ },
11
+ });
12
+ const contentPartsToOci = (content) => {
13
+ if (typeof content === 'string') {
14
+ return [textContent(content)];
15
+ }
16
+ if (!Array.isArray(content)) {
17
+ return [];
18
+ }
19
+ const parts = [];
20
+ for (const part of content) {
21
+ if (part instanceof ContentPartTextParam) {
22
+ parts.push(textContent(part.text));
23
+ continue;
24
+ }
25
+ if (part instanceof ContentPartImageParam) {
26
+ parts.push(imageContent(part.image_url.url));
27
+ continue;
28
+ }
29
+ if (part instanceof ContentPartRefusalParam) {
30
+ parts.push(textContent(`[Refusal] ${part.refusal}`));
31
+ }
32
+ }
33
+ return parts;
34
+ };
35
+ const serializeRole = (message) => {
36
+ if (message instanceof SystemMessage) {
37
+ return 'SYSTEM';
38
+ }
39
+ if (message instanceof AssistantMessage) {
40
+ return 'ASSISTANT';
41
+ }
42
+ return 'USER';
43
+ };
44
+ const serializeName = (message) => {
45
+ if (message instanceof UserMessage || message instanceof SystemMessage) {
46
+ return message.name ?? undefined;
47
+ }
48
+ return undefined;
49
+ };
50
+ export class OCIRawMessageSerializer {
51
+ static serializeMessages(messages) {
52
+ const serialized = [];
53
+ for (const message of messages) {
54
+ const content = message instanceof AssistantMessage
55
+ ? contentPartsToOci(message.content)
56
+ : contentPartsToOci(message.content);
57
+ if (content.length === 0) {
58
+ continue;
59
+ }
60
+ serialized.push({
61
+ role: serializeRole(message),
62
+ name: serializeName(message),
63
+ content,
64
+ });
65
+ }
66
+ return serialized;
67
+ }
68
+ static serializeMessagesForCohere(messages) {
69
+ const conversationParts = [];
70
+ for (const message of messages) {
71
+ let text = '';
72
+ if (message instanceof UserMessage || message instanceof SystemMessage) {
73
+ const content = message.content;
74
+ if (typeof content === 'string') {
75
+ text = content;
76
+ }
77
+ else {
78
+ text = content
79
+ .map((part) => {
80
+ if (part instanceof ContentPartTextParam) {
81
+ return part.text;
82
+ }
83
+ if (part instanceof ContentPartImageParam) {
84
+ return part.image_url.url.startsWith('data:image/')
85
+ ? '[Image: base64_data]'
86
+ : '[Image: external_url]';
87
+ }
88
+ return '';
89
+ })
90
+ .filter(Boolean)
91
+ .join(' ');
92
+ }
93
+ }
94
+ else if (message instanceof AssistantMessage) {
95
+ if (typeof message.content === 'string') {
96
+ text = message.content;
97
+ }
98
+ else if (Array.isArray(message.content)) {
99
+ text = message.content
100
+ .map((part) => {
101
+ if (part instanceof ContentPartTextParam) {
102
+ return part.text;
103
+ }
104
+ if (part instanceof ContentPartRefusalParam) {
105
+ return `[Refusal] ${part.refusal}`;
106
+ }
107
+ return '';
108
+ })
109
+ .filter(Boolean)
110
+ .join(' ');
111
+ }
112
+ else if (message.refusal) {
113
+ text = `[Refusal] ${message.refusal}`;
114
+ }
115
+ }
116
+ if (!text) {
117
+ continue;
118
+ }
119
+ const prefix = message instanceof SystemMessage
120
+ ? 'System'
121
+ : message instanceof AssistantMessage
122
+ ? 'Assistant'
123
+ : 'User';
124
+ conversationParts.push(`${prefix}: ${text}`);
125
+ }
126
+ return conversationParts.join('\n\n');
127
+ }
128
+ }
@@ -75,6 +75,7 @@ export declare class MCPServer {
75
75
  private cleanupExpiredSessions;
76
76
  private startSessionCleanupLoop;
77
77
  private stopSessionCleanupLoop;
78
+ private formatToolResult;
78
79
  private setupHandlers;
79
80
  private ensureController;
80
81
  private ensureBrowserSession;
@@ -363,6 +363,64 @@ export class MCPServer {
363
363
  clearInterval(this.sessionCleanupInterval);
364
364
  this.sessionCleanupInterval = null;
365
365
  }
366
+ formatToolResult(toolName, result) {
367
+ if (toolName === 'browser_get_state' &&
368
+ result &&
369
+ typeof result === 'object' &&
370
+ !Array.isArray(result)) {
371
+ const payload = {
372
+ ...result,
373
+ };
374
+ const screenshot = typeof payload.screenshot === 'string' && payload.screenshot.trim()
375
+ ? payload.screenshot
376
+ : null;
377
+ delete payload.screenshot;
378
+ const pageInfo = payload.page_info &&
379
+ typeof payload.page_info === 'object' &&
380
+ !Array.isArray(payload.page_info)
381
+ ? payload.page_info
382
+ : null;
383
+ const viewportWidth = typeof pageInfo?.viewport_width === 'number'
384
+ ? pageInfo.viewport_width
385
+ : null;
386
+ const viewportHeight = typeof pageInfo?.viewport_height === 'number'
387
+ ? pageInfo.viewport_height
388
+ : null;
389
+ if (screenshot &&
390
+ viewportWidth !== null &&
391
+ viewportHeight !== null &&
392
+ payload.screenshot_dimensions == null) {
393
+ payload.screenshot_dimensions = {
394
+ width: viewportWidth,
395
+ height: viewportHeight,
396
+ };
397
+ }
398
+ const content = [
399
+ {
400
+ type: 'text',
401
+ text: JSON.stringify(payload, null, 2),
402
+ },
403
+ ];
404
+ if (screenshot) {
405
+ content.push({
406
+ type: 'image',
407
+ data: screenshot,
408
+ mimeType: 'image/png',
409
+ });
410
+ }
411
+ return { content };
412
+ }
413
+ return {
414
+ content: [
415
+ {
416
+ type: 'text',
417
+ text: typeof result === 'string'
418
+ ? result
419
+ : JSON.stringify(result, null, 2),
420
+ },
421
+ ],
422
+ };
423
+ }
366
424
  setupHandlers() {
367
425
  // List available tools
368
426
  this.server.setRequestHandler(ListToolsRequestSchema, async () => {
@@ -386,16 +444,7 @@ export class MCPServer {
386
444
  logger.debug(`Executing tool: ${request.params.name}`);
387
445
  this.toolExecutionCount++;
388
446
  const result = await tool.handler(request.params.arguments || {});
389
- return {
390
- content: [
391
- {
392
- type: 'text',
393
- text: typeof result === 'string'
394
- ? result
395
- : JSON.stringify(result, null, 2),
396
- },
397
- ],
398
- };
447
+ return this.formatToolResult(request.params.name, result);
399
448
  }
400
449
  catch (error) {
401
450
  this.errorCount++;
@@ -676,7 +725,7 @@ export class MCPServer {
676
725
  this.registerTool('retry_with_browser_use_agent', 'Retry a complex task with the browser-use autonomous agent', z.object({
677
726
  task: z.string(),
678
727
  max_steps: z.number().int().optional().default(100),
679
- model: z.string().optional().default('gpt-4o'),
728
+ model: z.string().optional(),
680
729
  allowed_domains: z.array(z.string()).optional().default([]),
681
730
  use_vision: z.boolean().optional().default(true),
682
731
  }), async (args) => {
@@ -684,7 +733,7 @@ export class MCPServer {
684
733
  if (!task) {
685
734
  throw new Error('task is required');
686
735
  }
687
- const model = String(args?.model ?? 'gpt-4o').trim();
736
+ const requestedModel = typeof args?.model === 'string' ? args.model.trim() : '';
688
737
  const maxSteps = Number(args?.max_steps ?? 100);
689
738
  const useVision = Boolean(args?.use_vision ?? true);
690
739
  const allowedDomains = Array.isArray(args?.allowed_domains)
@@ -696,7 +745,7 @@ export class MCPServer {
696
745
  const configuredModel = typeof llmConfig.model === 'string' && llmConfig.model.trim()
697
746
  ? llmConfig.model.trim()
698
747
  : 'gpt-4o';
699
- const llmModel = model || configuredModel;
748
+ const llmModel = requestedModel || configuredModel;
700
749
  let llm;
701
750
  try {
702
751
  llm = this.createLlmFromModelName(llmModel, llmConfig);
@@ -1,7 +1,7 @@
1
1
  import { createRequire } from 'node:module';
2
2
  import { config as loadEnv } from 'dotenv';
3
3
  import { createLogger } from './logging-config.js';
4
- loadEnv();
4
+ loadEnv({ quiet: true });
5
5
  const require = createRequire(import.meta.url);
6
6
  const logger = createLogger('browser_use.observability');
7
7
  let lmnrObserve = null;
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env node
2
+ import { CloudBrowserClient } from '../browser/cloud/cloud.js';
3
+ export interface DirectModeState {
4
+ mode?: 'local' | 'remote';
5
+ cdp_url?: string | null;
6
+ session_id?: string | null;
7
+ browser_pid?: number | null;
8
+ user_data_dir?: string | null;
9
+ owns_user_data_dir?: boolean | null;
10
+ active_url?: string | null;
11
+ }
12
+ export declare const DIRECT_STATE_FILE: string;
13
+ interface StreamLike {
14
+ write(chunk: string): void;
15
+ }
16
+ interface DirectSessionLike {
17
+ tabs?: Array<{
18
+ target_id?: string | null;
19
+ url?: string | null;
20
+ }>;
21
+ active_tab?: {
22
+ target_id?: string | null;
23
+ url?: string | null;
24
+ } | null;
25
+ event_bus?: {
26
+ stop?: () => Promise<void> | void;
27
+ } | null;
28
+ browser_context?: {
29
+ cookies?: (urls?: string[]) => Promise<any[]>;
30
+ addCookies?: (cookies: any[]) => Promise<unknown>;
31
+ clearCookies?: () => Promise<unknown>;
32
+ } | null;
33
+ detach_all_watchdogs?: () => void;
34
+ start: () => Promise<unknown>;
35
+ navigate_to?: (url: string) => Promise<unknown>;
36
+ get_current_page?: () => Promise<any>;
37
+ get_browser_state_with_recovery?: (options?: {
38
+ include_screenshot?: boolean;
39
+ }) => Promise<{
40
+ llm_representation: () => string;
41
+ url?: string;
42
+ title?: string;
43
+ tabs?: unknown[];
44
+ }>;
45
+ get_page_info?: () => Promise<any>;
46
+ get_dom_element_by_index?: (index: number) => Promise<any>;
47
+ get_locate_element?: (node: any) => Promise<any>;
48
+ _click_element_node?: (node: any) => Promise<unknown>;
49
+ click_coordinates?: (x: number, y: number, options?: {
50
+ button?: 'left' | 'middle' | 'right';
51
+ }) => Promise<unknown>;
52
+ send_keys?: (text: string) => Promise<unknown>;
53
+ _input_text_element_node?: (node: any, text: string, options?: {
54
+ clear?: boolean;
55
+ }) => Promise<unknown>;
56
+ take_screenshot?: (full_page?: boolean) => Promise<string | null>;
57
+ scroll?: (direction: 'up' | 'down' | 'left' | 'right', amount: number) => Promise<unknown>;
58
+ go_back?: () => Promise<unknown>;
59
+ go_forward?: () => Promise<unknown>;
60
+ get_page_html?: () => Promise<string>;
61
+ execute_javascript?: (script: string) => Promise<unknown>;
62
+ switch_to_tab?: (identifier: number | string) => Promise<unknown>;
63
+ close_tab?: (identifier: number | string) => Promise<unknown>;
64
+ select_dropdown_option?: (node: any, value: string) => Promise<unknown>;
65
+ wait_for_element?: (selector: string, timeout: number) => Promise<unknown>;
66
+ get_cookies?: () => Promise<any[]>;
67
+ }
68
+ export interface DirectCliEnvironment {
69
+ state_file?: string;
70
+ stdout?: StreamLike;
71
+ stderr?: StreamLike;
72
+ session_factory?: (init: {
73
+ cdp_url?: string | null;
74
+ }) => DirectSessionLike;
75
+ cloud_client_factory?: () => Pick<CloudBrowserClient, 'create_browser' | 'stop_browser'>;
76
+ local_launcher?: (options: {
77
+ state: DirectModeState;
78
+ }) => Promise<{
79
+ cdp_url: string;
80
+ browser_pid?: number | null;
81
+ user_data_dir?: string | null;
82
+ owns_user_data_dir?: boolean | null;
83
+ }>;
84
+ kill_process?: (pid: number) => void | Promise<void>;
85
+ }
86
+ export declare const load_direct_state: (state_file?: string) => DirectModeState;
87
+ export declare const save_direct_state: (state: DirectModeState, state_file?: string) => void;
88
+ export declare const clear_direct_state: (state_file?: string) => void;
89
+ export declare const defaultLocalLauncher: (options: {
90
+ state: DirectModeState;
91
+ timeout_ms?: number;
92
+ }) => Promise<{
93
+ cdp_url: string;
94
+ browser_pid: number | null;
95
+ user_data_dir: string | null | undefined;
96
+ owns_user_data_dir: boolean;
97
+ }>;
98
+ export declare const run_direct_command: (argv: string[], options?: DirectCliEnvironment) => Promise<0 | 1>;
99
+ export declare const main: (argv?: string[]) => Promise<0 | 1>;
100
+ export {};