browser-use 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/README.md +295 -686
  2. package/dist/actor/element.d.ts +19 -0
  3. package/dist/actor/element.js +46 -0
  4. package/dist/actor/index.d.ts +4 -0
  5. package/dist/actor/index.js +4 -0
  6. package/dist/actor/mouse.d.ts +19 -0
  7. package/dist/actor/mouse.js +39 -0
  8. package/dist/actor/page.d.ts +29 -0
  9. package/dist/actor/page.js +88 -0
  10. package/dist/actor/utils.d.ts +4 -0
  11. package/dist/actor/utils.js +35 -0
  12. package/dist/agent/cloud-events.d.ts +18 -0
  13. package/dist/agent/cloud-events.js +65 -2
  14. package/dist/agent/gif.d.ts +1 -0
  15. package/dist/agent/gif.js +24 -2
  16. package/dist/agent/judge.d.ts +17 -0
  17. package/dist/agent/judge.js +197 -0
  18. package/dist/agent/message-manager/service.d.ts +12 -4
  19. package/dist/agent/message-manager/service.js +205 -39
  20. package/dist/agent/message-manager/utils.js +0 -1
  21. package/dist/agent/message-manager/views.d.ts +4 -0
  22. package/dist/agent/message-manager/views.js +11 -7
  23. package/dist/agent/prompts.d.ts +24 -3
  24. package/dist/agent/prompts.js +274 -59
  25. package/dist/agent/service.d.ts +99 -41
  26. package/dist/agent/service.js +2266 -472
  27. package/dist/agent/variable-detector.d.ts +12 -0
  28. package/dist/agent/variable-detector.js +211 -0
  29. package/dist/agent/views.d.ts +237 -18
  30. package/dist/agent/views.js +446 -33
  31. package/dist/browser/cloud/cloud.d.ts +20 -0
  32. package/dist/browser/cloud/cloud.js +129 -0
  33. package/dist/browser/cloud/index.d.ts +2 -0
  34. package/dist/browser/cloud/index.js +2 -0
  35. package/dist/browser/cloud/views.d.ts +41 -0
  36. package/dist/browser/cloud/views.js +35 -0
  37. package/dist/browser/events.d.ts +345 -0
  38. package/dist/browser/events.js +566 -0
  39. package/dist/browser/extensions.js +17 -17
  40. package/dist/browser/index.d.ts +4 -0
  41. package/dist/browser/index.js +4 -0
  42. package/dist/browser/profile.d.ts +8 -2
  43. package/dist/browser/profile.js +79 -12
  44. package/dist/browser/session-manager.d.ts +85 -0
  45. package/dist/browser/session-manager.js +208 -0
  46. package/dist/browser/session.d.ts +100 -8
  47. package/dist/browser/session.js +1097 -58
  48. package/dist/browser/types.d.ts +0 -2
  49. package/dist/browser/views.d.ts +39 -0
  50. package/dist/browser/views.js +32 -0
  51. package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
  52. package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
  53. package/dist/browser/watchdogs/base.d.ts +21 -0
  54. package/dist/browser/watchdogs/base.js +81 -0
  55. package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
  56. package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
  57. package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
  58. package/dist/browser/watchdogs/crash-watchdog.js +296 -0
  59. package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
  60. package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
  61. package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
  62. package/dist/browser/watchdogs/dom-watchdog.js +31 -0
  63. package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
  64. package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
  65. package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
  66. package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
  67. package/dist/browser/watchdogs/index.d.ts +15 -0
  68. package/dist/browser/watchdogs/index.js +15 -0
  69. package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
  70. package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
  71. package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
  72. package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
  73. package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
  74. package/dist/browser/watchdogs/popups-watchdog.js +77 -0
  75. package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
  76. package/dist/browser/watchdogs/recording-watchdog.js +249 -0
  77. package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
  78. package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
  79. package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
  80. package/dist/browser/watchdogs/security-watchdog.js +84 -0
  81. package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
  82. package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
  83. package/dist/cli.d.ts +7 -2
  84. package/dist/cli.js +182 -25
  85. package/dist/code-use/formatting.d.ts +3 -0
  86. package/dist/code-use/formatting.js +18 -0
  87. package/dist/code-use/index.d.ts +6 -0
  88. package/dist/code-use/index.js +6 -0
  89. package/dist/code-use/namespace.d.ts +5 -0
  90. package/dist/code-use/namespace.js +81 -0
  91. package/dist/code-use/notebook-export.d.ts +3 -0
  92. package/dist/code-use/notebook-export.js +56 -0
  93. package/dist/code-use/service.d.ts +24 -0
  94. package/dist/code-use/service.js +104 -0
  95. package/dist/code-use/utils.d.ts +4 -0
  96. package/dist/code-use/utils.js +98 -0
  97. package/dist/code-use/views.d.ts +108 -0
  98. package/dist/code-use/views.js +165 -0
  99. package/dist/config.d.ts +13 -0
  100. package/dist/config.js +69 -3
  101. package/dist/controller/registry/service.d.ts +10 -1
  102. package/dist/controller/registry/service.js +266 -10
  103. package/dist/controller/registry/views.d.ts +4 -1
  104. package/dist/controller/registry/views.js +25 -2
  105. package/dist/controller/service.d.ts +10 -1
  106. package/dist/controller/service.js +1807 -268
  107. package/dist/controller/views.d.ts +78 -155
  108. package/dist/controller/views.js +61 -12
  109. package/dist/dom/history-tree-processor/service.d.ts +5 -0
  110. package/dist/dom/history-tree-processor/service.js +169 -14
  111. package/dist/dom/history-tree-processor/view.d.ts +7 -1
  112. package/dist/dom/history-tree-processor/view.js +10 -1
  113. package/dist/dom/markdown-extractor.d.ts +37 -0
  114. package/dist/dom/markdown-extractor.js +345 -0
  115. package/dist/dom/service.d.ts +3 -1
  116. package/dist/dom/service.js +76 -0
  117. package/dist/dom/views.d.ts +1 -0
  118. package/dist/dom/views.js +45 -0
  119. package/dist/event-bus.d.ts +107 -7
  120. package/dist/event-bus.js +313 -10
  121. package/dist/exceptions.d.ts +0 -3
  122. package/dist/exceptions.js +0 -7
  123. package/dist/filesystem/file-system.d.ts +18 -0
  124. package/dist/filesystem/file-system.js +503 -42
  125. package/dist/index.d.ts +7 -0
  126. package/dist/index.js +6 -0
  127. package/dist/integrations/gmail/actions.d.ts +3 -3
  128. package/dist/integrations/gmail/actions.js +4 -4
  129. package/dist/llm/anthropic/chat.d.ts +18 -1
  130. package/dist/llm/anthropic/chat.js +123 -55
  131. package/dist/llm/anthropic/serializer.d.ts +2 -0
  132. package/dist/llm/anthropic/serializer.js +81 -9
  133. package/dist/llm/aws/chat-anthropic.d.ts +17 -0
  134. package/dist/llm/aws/chat-anthropic.js +126 -26
  135. package/dist/llm/aws/chat-bedrock.d.ts +28 -1
  136. package/dist/llm/aws/chat-bedrock.js +161 -34
  137. package/dist/llm/aws/serializer.d.ts +13 -1
  138. package/dist/llm/aws/serializer.js +56 -17
  139. package/dist/llm/azure/chat.d.ts +53 -2
  140. package/dist/llm/azure/chat.js +366 -54
  141. package/dist/llm/base.d.ts +2 -0
  142. package/dist/llm/browser-use/chat.d.ts +40 -0
  143. package/dist/llm/browser-use/chat.js +305 -0
  144. package/dist/llm/browser-use/index.d.ts +1 -0
  145. package/dist/llm/browser-use/index.js +1 -0
  146. package/dist/llm/cerebras/chat.d.ts +39 -0
  147. package/dist/llm/cerebras/chat.js +178 -0
  148. package/dist/llm/cerebras/index.d.ts +2 -0
  149. package/dist/llm/cerebras/index.js +2 -0
  150. package/dist/llm/cerebras/serializer.d.ts +7 -0
  151. package/dist/llm/cerebras/serializer.js +82 -0
  152. package/dist/llm/deepseek/chat.d.ts +19 -2
  153. package/dist/llm/deepseek/chat.js +138 -25
  154. package/dist/llm/google/chat.d.ts +46 -2
  155. package/dist/llm/google/chat.js +267 -64
  156. package/dist/llm/google/serializer.d.ts +9 -1
  157. package/dist/llm/google/serializer.js +141 -34
  158. package/dist/llm/groq/chat.d.ts +21 -2
  159. package/dist/llm/groq/chat.js +125 -26
  160. package/dist/llm/groq/parser.js +3 -1
  161. package/dist/llm/mistral/chat.d.ts +43 -0
  162. package/dist/llm/mistral/chat.js +154 -0
  163. package/dist/llm/mistral/index.d.ts +2 -0
  164. package/dist/llm/mistral/index.js +2 -0
  165. package/dist/llm/mistral/schema.d.ts +8 -0
  166. package/dist/llm/mistral/schema.js +27 -0
  167. package/dist/llm/models.d.ts +2 -0
  168. package/dist/llm/models.js +317 -0
  169. package/dist/llm/ollama/chat.d.ts +13 -1
  170. package/dist/llm/ollama/chat.js +110 -19
  171. package/dist/llm/ollama/serializer.d.ts +1 -0
  172. package/dist/llm/ollama/serializer.js +34 -12
  173. package/dist/llm/openai/chat.d.ts +16 -0
  174. package/dist/llm/openai/chat.js +94 -44
  175. package/dist/llm/openai/like.d.ts +5 -3
  176. package/dist/llm/openai/like.js +7 -3
  177. package/dist/llm/openai/responses-serializer.d.ts +18 -0
  178. package/dist/llm/openai/responses-serializer.js +72 -0
  179. package/dist/llm/openrouter/chat.d.ts +28 -2
  180. package/dist/llm/openrouter/chat.js +115 -29
  181. package/dist/llm/schema.d.ts +11 -1
  182. package/dist/llm/schema.js +81 -1
  183. package/dist/llm/vercel/chat.d.ts +50 -0
  184. package/dist/llm/vercel/chat.js +276 -0
  185. package/dist/llm/vercel/index.d.ts +1 -0
  186. package/dist/llm/vercel/index.js +1 -0
  187. package/dist/llm/vercel/serializer.d.ts +5 -0
  188. package/dist/llm/vercel/serializer.js +7 -0
  189. package/dist/llm/views.d.ts +2 -1
  190. package/dist/llm/views.js +3 -1
  191. package/dist/logging-config.d.ts +2 -0
  192. package/dist/logging-config.js +82 -29
  193. package/dist/mcp/client.d.ts +10 -5
  194. package/dist/mcp/client.js +14 -9
  195. package/dist/mcp/controller.d.ts +42 -3
  196. package/dist/mcp/controller.js +56 -31
  197. package/dist/mcp/server.d.ts +14 -0
  198. package/dist/mcp/server.js +255 -52
  199. package/dist/observability.js +10 -4
  200. package/dist/sandbox/index.d.ts +2 -0
  201. package/dist/sandbox/index.js +2 -0
  202. package/dist/sandbox/sandbox.d.ts +19 -0
  203. package/dist/sandbox/sandbox.js +140 -0
  204. package/dist/sandbox/views.d.ts +67 -0
  205. package/dist/sandbox/views.js +121 -0
  206. package/dist/skill-cli/index.d.ts +3 -0
  207. package/dist/skill-cli/index.js +3 -0
  208. package/dist/skill-cli/protocol.d.ts +30 -0
  209. package/dist/skill-cli/protocol.js +48 -0
  210. package/dist/skill-cli/server.d.ts +11 -0
  211. package/dist/skill-cli/server.js +85 -0
  212. package/dist/skill-cli/sessions.d.ts +24 -0
  213. package/dist/skill-cli/sessions.js +47 -0
  214. package/dist/skills/index.d.ts +3 -0
  215. package/dist/skills/index.js +3 -0
  216. package/dist/skills/service.d.ts +27 -0
  217. package/dist/skills/service.js +266 -0
  218. package/dist/skills/utils.d.ts +6 -0
  219. package/dist/skills/utils.js +53 -0
  220. package/dist/skills/views.d.ts +40 -0
  221. package/dist/skills/views.js +10 -0
  222. package/dist/sync/auth.js +8 -3
  223. package/dist/sync/service.d.ts +6 -6
  224. package/dist/sync/service.js +54 -89
  225. package/dist/telemetry/views.d.ts +20 -6
  226. package/dist/telemetry/views.js +23 -5
  227. package/dist/tokens/custom-pricing.d.ts +2 -0
  228. package/dist/tokens/custom-pricing.js +22 -0
  229. package/dist/tokens/index.d.ts +2 -0
  230. package/dist/tokens/index.js +2 -0
  231. package/dist/tokens/mappings.d.ts +1 -0
  232. package/dist/tokens/mappings.js +3 -0
  233. package/dist/tokens/service.js +27 -8
  234. package/dist/tools/extraction/index.d.ts +2 -0
  235. package/dist/tools/extraction/index.js +2 -0
  236. package/dist/tools/extraction/schema-utils.d.ts +6 -0
  237. package/dist/tools/extraction/schema-utils.js +237 -0
  238. package/dist/tools/extraction/views.d.ts +7 -0
  239. package/dist/tools/index.d.ts +5 -0
  240. package/dist/tools/index.js +5 -0
  241. package/dist/tools/registry/index.d.ts +2 -0
  242. package/dist/tools/registry/index.js +2 -0
  243. package/dist/tools/registry/service.d.ts +1 -0
  244. package/dist/tools/registry/service.js +1 -0
  245. package/dist/tools/registry/views.d.ts +1 -0
  246. package/dist/tools/registry/views.js +1 -0
  247. package/dist/tools/service.d.ts +2 -0
  248. package/dist/tools/service.js +1 -0
  249. package/dist/tools/utils.d.ts +2 -0
  250. package/dist/tools/utils.js +57 -0
  251. package/dist/tools/views.d.ts +1 -0
  252. package/dist/tools/views.js +1 -0
  253. package/dist/utils.d.ts +10 -1
  254. package/dist/utils.js +70 -3
  255. package/package.json +87 -26
  256. package/dist/dom/playground/process-dom.js +0 -5
  257. package/dist/dom/playground/test-accessibility.d.ts +0 -44
  258. package/dist/dom/playground/test-accessibility.js +0 -111
  259. /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
@@ -0,0 +1,19 @@
1
+ import type { BrowserSession } from '../browser/session.js';
2
+ import type { DOMElementNode } from '../dom/views.js';
3
+ export declare class Element {
4
+ private readonly browser_session;
5
+ readonly node: DOMElementNode;
6
+ constructor(browser_session: BrowserSession, node: DOMElementNode);
7
+ click(): Promise<string | null>;
8
+ fill(value: string, clear?: boolean): Promise<void>;
9
+ hover(): Promise<void>;
10
+ get_attribute(name: string): Promise<string>;
11
+ get_bounding_box(): Promise<{
12
+ x: number;
13
+ y: number;
14
+ width: number;
15
+ height: number;
16
+ } | null>;
17
+ select_option(values: string | string[]): Promise<void>;
18
+ evaluate(page_function: string, ...args: unknown[]): Promise<any>;
19
+ }
@@ -0,0 +1,46 @@
1
+ export class Element {
2
+ browser_session;
3
+ node;
4
+ constructor(browser_session, node) {
5
+ this.browser_session = browser_session;
6
+ this.node = node;
7
+ }
8
+ async click() {
9
+ return this.browser_session._click_element_node(this.node);
10
+ }
11
+ async fill(value, clear = true) {
12
+ return this.browser_session._input_text_element_node(this.node, value, {
13
+ clear,
14
+ });
15
+ }
16
+ async hover() {
17
+ const locator = await this.browser_session.get_locate_element(this.node);
18
+ if (!locator?.hover) {
19
+ return;
20
+ }
21
+ await locator.hover({ timeout: 5000 });
22
+ }
23
+ async get_attribute(name) {
24
+ return this.node.attributes?.[name] ?? null;
25
+ }
26
+ async get_bounding_box() {
27
+ const locator = await this.browser_session.get_locate_element(this.node);
28
+ if (!locator?.boundingBox) {
29
+ return null;
30
+ }
31
+ return locator.boundingBox();
32
+ }
33
+ async select_option(values) {
34
+ const list = Array.isArray(values) ? values : [values];
35
+ for (const value of list) {
36
+ await this.browser_session.select_dropdown_option(this.node, value);
37
+ }
38
+ }
39
+ async evaluate(page_function, ...args) {
40
+ const locator = await this.browser_session.get_locate_element(this.node);
41
+ if (!locator?.evaluate) {
42
+ throw new Error('Element evaluate is unavailable for this node');
43
+ }
44
+ return locator.evaluate(page_function, ...args);
45
+ }
46
+ }
@@ -0,0 +1,4 @@
1
+ export * from './element.js';
2
+ export * from './mouse.js';
3
+ export * from './page.js';
4
+ export * from './utils.js';
@@ -0,0 +1,4 @@
1
+ export * from './element.js';
2
+ export * from './mouse.js';
3
+ export * from './page.js';
4
+ export * from './utils.js';
@@ -0,0 +1,19 @@
1
+ import type { BrowserSession } from '../browser/session.js';
2
+ import type { MouseButton } from '../browser/events.js';
3
+ export declare class Mouse {
4
+ private readonly browser_session;
5
+ private readonly pageRef;
6
+ constructor(browser_session: BrowserSession, pageRef?: any | null);
7
+ private _page;
8
+ click(x: number, y: number, options?: {
9
+ button?: MouseButton;
10
+ click_count?: number;
11
+ }): Promise<void>;
12
+ move(x: number, y: number): Promise<void>;
13
+ down(options?: {
14
+ button?: MouseButton;
15
+ }): Promise<void>;
16
+ up(options?: {
17
+ button?: MouseButton;
18
+ }): Promise<void>;
19
+ }
@@ -0,0 +1,39 @@
1
+ export class Mouse {
2
+ browser_session;
3
+ pageRef;
4
+ constructor(browser_session, pageRef = null) {
5
+ this.browser_session = browser_session;
6
+ this.pageRef = pageRef;
7
+ }
8
+ async _page() {
9
+ if (this.pageRef) {
10
+ return this.pageRef;
11
+ }
12
+ return this.browser_session.get_current_page();
13
+ }
14
+ async click(x, y, options = {}) {
15
+ const button = options.button ?? 'left';
16
+ await this.browser_session.click_coordinates(x, y, { button });
17
+ }
18
+ async move(x, y) {
19
+ const page = await this._page();
20
+ if (!page?.mouse?.move) {
21
+ return;
22
+ }
23
+ await page.mouse.move(x, y);
24
+ }
25
+ async down(options = {}) {
26
+ const page = await this._page();
27
+ if (!page?.mouse?.down) {
28
+ return;
29
+ }
30
+ await page.mouse.down({ button: options.button ?? 'left' });
31
+ }
32
+ async up(options = {}) {
33
+ const page = await this._page();
34
+ if (!page?.mouse?.up) {
35
+ return;
36
+ }
37
+ await page.mouse.up({ button: options.button ?? 'left' });
38
+ }
39
+ }
@@ -0,0 +1,29 @@
1
+ import type { WaitUntilState } from '../browser/events.js';
2
+ import type { BrowserSession } from '../browser/session.js';
3
+ import { Element } from './element.js';
4
+ import { Mouse } from './mouse.js';
5
+ export declare class Page {
6
+ private readonly browser_session;
7
+ private _mouse;
8
+ constructor(browser_session: BrowserSession);
9
+ get mouse(): Mouse;
10
+ _currentPage(): Promise<import("playwright-core").Page>;
11
+ get_url(): Promise<string>;
12
+ get_title(): Promise<string>;
13
+ goto(url: string, options?: {
14
+ wait_until?: WaitUntilState;
15
+ timeout_ms?: number | null;
16
+ }): Promise<void>;
17
+ navigate(url: string, options?: Parameters<Page['goto']>[1]): Promise<void>;
18
+ reload(): Promise<void>;
19
+ go_back(): Promise<void>;
20
+ go_forward(): Promise<void>;
21
+ evaluate(page_function: string | ((...args: unknown[]) => unknown), ...args: unknown[]): Promise<unknown>;
22
+ screenshot(options?: {
23
+ full_page?: boolean;
24
+ }): Promise<string | null>;
25
+ press(key: string): Promise<void>;
26
+ set_viewport_size(width: number, height: number): Promise<void>;
27
+ get_element_by_index(index: number): Promise<Element | null>;
28
+ must_get_element_by_index(index: number): Promise<Element>;
29
+ }
@@ -0,0 +1,88 @@
1
+ import { Element } from './element.js';
2
+ import { Mouse } from './mouse.js';
3
+ export class Page {
4
+ browser_session;
5
+ _mouse = null;
6
+ constructor(browser_session) {
7
+ this.browser_session = browser_session;
8
+ }
9
+ get mouse() {
10
+ if (!this._mouse) {
11
+ this._mouse = new Mouse(this.browser_session);
12
+ }
13
+ return this._mouse;
14
+ }
15
+ async _currentPage() {
16
+ const page = await this.browser_session.get_current_page();
17
+ if (!page) {
18
+ throw new Error('No active page available');
19
+ }
20
+ return page;
21
+ }
22
+ async get_url() {
23
+ const page = await this._currentPage();
24
+ return typeof page.url === 'function' ? page.url() : '';
25
+ }
26
+ async get_title() {
27
+ const page = await this._currentPage();
28
+ return typeof page.title === 'function' ? page.title() : '';
29
+ }
30
+ async goto(url, options = {}) {
31
+ await this.browser_session.navigate_to(url, {
32
+ wait_until: options.wait_until,
33
+ timeout_ms: options.timeout_ms,
34
+ });
35
+ }
36
+ async navigate(url, options = {}) {
37
+ await this.goto(url, options);
38
+ }
39
+ async reload() {
40
+ await this.browser_session.refresh();
41
+ }
42
+ async go_back() {
43
+ await this.browser_session.go_back();
44
+ }
45
+ async go_forward() {
46
+ await this.browser_session.go_forward();
47
+ }
48
+ async evaluate(page_function, ...args) {
49
+ const page = await this._currentPage();
50
+ if (typeof page_function === 'function') {
51
+ return page.evaluate(page_function, ...args);
52
+ }
53
+ if (args.length === 0) {
54
+ return page.evaluate(page_function);
55
+ }
56
+ const expression = `(${page_function})(${args
57
+ .map((arg) => JSON.stringify(arg))
58
+ .join(',')})`;
59
+ return page.evaluate(expression);
60
+ }
61
+ async screenshot(options = {}) {
62
+ return this.browser_session.take_screenshot(options.full_page ?? false);
63
+ }
64
+ async press(key) {
65
+ await this.browser_session.send_keys(key);
66
+ }
67
+ async set_viewport_size(width, height) {
68
+ const page = await this._currentPage();
69
+ if (!page.setViewportSize) {
70
+ return;
71
+ }
72
+ await page.setViewportSize({ width, height });
73
+ }
74
+ async get_element_by_index(index) {
75
+ const node = await this.browser_session.get_dom_element_by_index(index);
76
+ if (!node) {
77
+ return null;
78
+ }
79
+ return new Element(this.browser_session, node);
80
+ }
81
+ async must_get_element_by_index(index) {
82
+ const element = await this.get_element_by_index(index);
83
+ if (!element) {
84
+ throw new Error(`Element not found for index ${index}`);
85
+ }
86
+ return element;
87
+ }
88
+ }
@@ -0,0 +1,4 @@
1
+ export declare class Utils {
2
+ static get_key_info(key: string): [string, number | null];
3
+ }
4
+ export declare const get_key_info: (key: string) => [string, number | null];
@@ -0,0 +1,35 @@
1
+ export class Utils {
2
+ static get_key_info(key) {
3
+ const key_map = {
4
+ Backspace: ['Backspace', 8],
5
+ Tab: ['Tab', 9],
6
+ Enter: ['Enter', 13],
7
+ Escape: ['Escape', 27],
8
+ Space: ['Space', 32],
9
+ ArrowLeft: ['ArrowLeft', 37],
10
+ ArrowUp: ['ArrowUp', 38],
11
+ ArrowRight: ['ArrowRight', 39],
12
+ ArrowDown: ['ArrowDown', 40],
13
+ Delete: ['Delete', 46],
14
+ Shift: ['ShiftLeft', 16],
15
+ Control: ['ControlLeft', 17],
16
+ Alt: ['AltLeft', 18],
17
+ Meta: ['MetaLeft', 91],
18
+ };
19
+ const direct = key_map[key];
20
+ if (direct) {
21
+ return direct;
22
+ }
23
+ if (key.length === 1) {
24
+ if (/[a-z]/i.test(key)) {
25
+ const upper = key.toUpperCase();
26
+ return [`Key${upper}`, upper.charCodeAt(0)];
27
+ }
28
+ if (/[0-9]/.test(key)) {
29
+ return [`Digit${key}`, key.charCodeAt(0)];
30
+ }
31
+ }
32
+ return [key, null];
33
+ }
34
+ }
35
+ export const get_key_info = (key) => Utils.get_key_info(key);
@@ -261,4 +261,22 @@ export declare class CreateAgentSessionEvent extends BaseEvent {
261
261
  device_id: string | null;
262
262
  };
263
263
  }
264
+ export declare class UpdateAgentSessionEvent extends BaseEvent {
265
+ browser_session_stopped: boolean | null;
266
+ browser_session_stopped_at: Date | null;
267
+ end_reason: string | null;
268
+ constructor(init: Partial<UpdateAgentSessionEvent> & {
269
+ id: string;
270
+ user_id?: string;
271
+ });
272
+ toJSON(): {
273
+ browser_session_stopped: boolean | null;
274
+ browser_session_stopped_at: string | null;
275
+ end_reason: string | null;
276
+ event_type: string;
277
+ id: string;
278
+ user_id: string;
279
+ device_id: string | null;
280
+ };
281
+ }
264
282
  export {};
@@ -1,11 +1,17 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { uuid7str } from '../utils.js';
4
+ import { createLogger } from '../logging-config.js';
4
5
  const MAX_STRING_LENGTH = 100_000;
5
6
  const MAX_URL_LENGTH = 100_000;
6
7
  const MAX_TASK_LENGTH = 100_000;
7
8
  const MAX_COMMENT_LENGTH = 2_000;
8
9
  const MAX_FILE_CONTENT_SIZE = 50 * 1024 * 1024;
10
+ const MAX_LLM_MODEL_LENGTH = 200;
11
+ const MAX_END_REASON_LENGTH = 100;
12
+ const logger = createLogger('browser_use.agent.cloud_events');
13
+ const estimateBase64DecodedBytes = (value) => Math.floor((value.length * 3) / 4);
14
+ const extractBase64Payload = (value) => value.includes(',') ? value.split(',').slice(1).join(',') : value;
9
15
  const getDeviceId = (agent) => agent.cloud_sync?.auth_client?.device_id ?? null;
10
16
  const getBrowserProfile = (agent) => agent.browser_profile ?? agent.browser_session?.browser_profile ?? null;
11
17
  const serializeAgentState = (agent) => {
@@ -105,7 +111,17 @@ export class CreateAgentOutputFileEvent extends BaseEvent {
105
111
  super('CreateAgentOutputFileEvent', init);
106
112
  this.task_id = init.task_id;
107
113
  this.file_name = init.file_name;
108
- this.file_content = init.file_content ?? null;
114
+ if (init.file_content != null) {
115
+ const payload = extractBase64Payload(init.file_content);
116
+ const estimatedSize = estimateBase64DecodedBytes(payload);
117
+ if (estimatedSize > MAX_FILE_CONTENT_SIZE) {
118
+ throw new Error(`file_content exceeds maximum size of ${MAX_FILE_CONTENT_SIZE} bytes`);
119
+ }
120
+ this.file_content = init.file_content;
121
+ }
122
+ else {
123
+ this.file_content = null;
124
+ }
109
125
  this.content_type = init.content_type ?? null;
110
126
  this.created_at = init.created_at ?? new Date();
111
127
  }
@@ -156,6 +172,13 @@ export class CreateAgentStepEvent extends BaseEvent {
156
172
  this.memory = init.memory;
157
173
  this.next_goal = init.next_goal;
158
174
  this.actions = init.actions;
175
+ if (init.screenshot_url?.startsWith('data:')) {
176
+ const payload = extractBase64Payload(init.screenshot_url);
177
+ const estimatedSize = estimateBase64DecodedBytes(payload);
178
+ if (estimatedSize > MAX_FILE_CONTENT_SIZE) {
179
+ throw new Error(`screenshot_url exceeds maximum size of ${MAX_FILE_CONTENT_SIZE} bytes`);
180
+ }
181
+ }
159
182
  this.screenshot_url = init.screenshot_url ?? null;
160
183
  this.url = init.url;
161
184
  }
@@ -164,6 +187,12 @@ export class CreateAgentStepEvent extends BaseEvent {
164
187
  const screenshot = browser_state_summary.screenshot
165
188
  ? `data:image/png;base64,${browser_state_summary.screenshot}`
166
189
  : null;
190
+ if (browser_state_summary.screenshot) {
191
+ logger.debug(`Including screenshot in CreateAgentStepEvent, length: ${browser_state_summary.screenshot.length}`);
192
+ }
193
+ else {
194
+ logger.debug('No screenshot in browser_state_summary for CreateAgentStepEvent');
195
+ }
167
196
  return new CreateAgentStepEvent({
168
197
  device_id: getDeviceId(agent),
169
198
  agent_task_id: String(agent.task_id),
@@ -208,6 +237,12 @@ export class CreateAgentTaskEvent extends BaseEvent {
208
237
  constructor(init) {
209
238
  super('CreateAgentTaskEvent', init);
210
239
  this.agent_session_id = init.agent_session_id;
240
+ if (init.llm_model.length > MAX_LLM_MODEL_LENGTH) {
241
+ throw new Error(`llm_model exceeds maximum length of ${MAX_LLM_MODEL_LENGTH}`);
242
+ }
243
+ if (init.task.length > MAX_TASK_LENGTH) {
244
+ throw new Error(`task exceeds maximum length of ${MAX_TASK_LENGTH}`);
245
+ }
211
246
  this.llm_model = init.llm_model;
212
247
  this.task = init.task;
213
248
  this.stopped = init.stopped ?? false;
@@ -227,7 +262,7 @@ export class CreateAgentTaskEvent extends BaseEvent {
227
262
  id: String(agent.task_id),
228
263
  device_id: getDeviceId(agent),
229
264
  agent_session_id: String(agent.session_id),
230
- task: agent.task.slice(0, MAX_TASK_LENGTH),
265
+ task: agent.task,
231
266
  llm_model: agent.llm.model_name || agent.llm.model || 'unknown',
232
267
  agent_state: serializeAgentState(agent),
233
268
  stopped: false,
@@ -316,3 +351,31 @@ export class CreateAgentSessionEvent extends BaseEvent {
316
351
  };
317
352
  }
318
353
  }
354
+ export class UpdateAgentSessionEvent extends BaseEvent {
355
+ browser_session_stopped;
356
+ browser_session_stopped_at;
357
+ end_reason;
358
+ constructor(init) {
359
+ super('UpdateAgentSessionEvent', init);
360
+ this.browser_session_stopped = init.browser_session_stopped ?? null;
361
+ this.browser_session_stopped_at = init.browser_session_stopped_at ?? null;
362
+ if (init.end_reason != null) {
363
+ const endReason = String(init.end_reason);
364
+ if (endReason.length > MAX_END_REASON_LENGTH) {
365
+ throw new Error(`end_reason exceeds maximum length of ${MAX_END_REASON_LENGTH}`);
366
+ }
367
+ this.end_reason = endReason;
368
+ }
369
+ else {
370
+ this.end_reason = null;
371
+ }
372
+ }
373
+ toJSON() {
374
+ return {
375
+ ...super.toJSON(),
376
+ browser_session_stopped: this.browser_session_stopped,
377
+ browser_session_stopped_at: this.browser_session_stopped_at?.toISOString() ?? null,
378
+ end_reason: this.end_reason,
379
+ };
380
+ }
381
+ }
@@ -1,5 +1,6 @@
1
1
  import type { AgentHistoryList } from './views.js';
2
2
  export declare const decode_unicode_escapes_to_utf8: (text: string) => string;
3
+ export declare const is_valid_gif_screenshot_candidate: (screenshot: string | null | undefined, pageUrl: string | null | undefined) => boolean;
3
4
  export interface HistoryGifOptions {
4
5
  output_path?: string;
5
6
  duration?: number;
package/dist/agent/gif.js CHANGED
@@ -4,6 +4,7 @@ import { createCanvas, loadImage } from 'canvas';
4
4
  import GIFEncoder from 'gif-encoder-2';
5
5
  import { createLogger } from '../logging-config.js';
6
6
  import { PLACEHOLDER_4PX_SCREENSHOT } from '../browser/views.js';
7
+ import { is_new_tab_page } from '../utils.js';
7
8
  const logger = createLogger('browser_use.agent.gif');
8
9
  export const decode_unicode_escapes_to_utf8 = (text) => {
9
10
  if (!text.includes('\\u')) {
@@ -29,6 +30,8 @@ const loadScreenshot = async (screenshot) => {
29
30
  return loadImage(normalized);
30
31
  };
31
32
  const FONT_CANDIDATES = [
33
+ '"PingFang"',
34
+ '"STHeiti Medium"',
32
35
  '"Microsoft YaHei"',
33
36
  '"SimHei"',
34
37
  '"SimSun"',
@@ -38,6 +41,12 @@ const FONT_CANDIDATES = [
38
41
  '"sans-serif"',
39
42
  ];
40
43
  const pickFont = () => FONT_CANDIDATES.join(', ');
44
+ export const is_valid_gif_screenshot_candidate = (screenshot, pageUrl) => {
45
+ if (!screenshot || screenshot === PLACEHOLDER_4PX_SCREENSHOT) {
46
+ return false;
47
+ }
48
+ return !is_new_tab_page(pageUrl ?? '');
49
+ };
41
50
  const wrapText = (ctx, text, maxWidth) => {
42
51
  const words = decode_unicode_escapes_to_utf8(text).split(/\s+/);
43
52
  const lines = [];
@@ -158,9 +167,17 @@ export const create_history_gif = async (task, history, { output_path = 'agent_h
158
167
  return;
159
168
  }
160
169
  const screenshots = history.screenshots();
161
- const firstRealScreenshot = screenshots.find((shot) => shot && shot !== PLACEHOLDER_4PX_SCREENSHOT);
170
+ let firstRealScreenshot = null;
171
+ for (let index = 0; index < screenshots.length; index += 1) {
172
+ const screenshot = screenshots[index];
173
+ const pageUrl = history.history[index]?.state?.url ?? null;
174
+ if (is_valid_gif_screenshot_candidate(screenshot, pageUrl)) {
175
+ firstRealScreenshot = screenshot;
176
+ break;
177
+ }
178
+ }
162
179
  if (!firstRealScreenshot) {
163
- logger.warn('No valid screenshots found (all are placeholders)');
180
+ logger.warn('No valid screenshots found (all are placeholders or from new tab pages)');
164
181
  return;
165
182
  }
166
183
  const firstImage = await loadScreenshot(firstRealScreenshot);
@@ -187,6 +204,11 @@ export const create_history_gif = async (task, history, { output_path = 'agent_h
187
204
  encoder.addFrame(ctx);
188
205
  }
189
206
  for (let index = 0; index < screenshots.length; index += 1) {
207
+ const historyItem = history.history[index];
208
+ if (is_new_tab_page(historyItem?.state?.url ?? '')) {
209
+ logger.debug(`Skipping screenshot from new tab page (${historyItem?.state?.url ?? ''}) at step ${index}`);
210
+ continue;
211
+ }
190
212
  const screenshot = screenshots[index];
191
213
  if (!screenshot || screenshot === PLACEHOLDER_4PX_SCREENSHOT) {
192
214
  continue;
@@ -0,0 +1,17 @@
1
+ import { type Message } from '../llm/messages.js';
2
+ export interface ConstructJudgeMessagesOptions {
3
+ task: string;
4
+ final_result: string;
5
+ agent_steps: string[];
6
+ screenshot_paths: string[];
7
+ max_images?: number;
8
+ ground_truth?: string | null;
9
+ use_vision?: boolean | 'auto';
10
+ }
11
+ export interface ConstructSimpleJudgeMessagesOptions {
12
+ task: string;
13
+ final_result: string;
14
+ current_date?: string;
15
+ }
16
+ export declare const construct_judge_messages: (options: ConstructJudgeMessagesOptions) => Message[];
17
+ export declare const construct_simple_judge_messages: (options: ConstructSimpleJudgeMessagesOptions) => Message[];