@mastra/agent-browser 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,31 @@
1
1
  # @mastra/agent-browser
2
2
 
3
+ ## 0.2.0
4
+
5
+ ### Minor Changes
6
+
7
+ - Added `storageState` option and `exportStorageState()` method for lightweight auth persistence (cookies and localStorage). Also kills orphaned Chrome child processes on close to prevent zombies. ([#15194](https://github.com/mastra-ai/mastra/pull/15194))
8
+
9
+ ### Patch Changes
10
+
11
+ - AgentBrowser with default thread scope now initializes correctly. Previously, calling launch() followed by getPage() would throw "Browser not launched" when no explicit thread ID was provided. ([#15285](https://github.com/mastra-ai/mastra/pull/15285))
12
+
13
+ - Updated dependencies [[`87df955`](https://github.com/mastra-ai/mastra/commit/87df955c028660c075873fd5d74af28233ce32eb), [`8fad147`](https://github.com/mastra-ai/mastra/commit/8fad14759804179c8e080ce4d9dec6ef1a808b31), [`582644c`](https://github.com/mastra-ai/mastra/commit/582644c4a87f83b4f245a84d72b9e8590585012e), [`cbdf3e1`](https://github.com/mastra-ai/mastra/commit/cbdf3e12b3d0c30a6e5347be658e2009648c130a), [`8fe46d3`](https://github.com/mastra-ai/mastra/commit/8fe46d354027f3f0f0846e64219772348de106dd), [`18c67db`](https://github.com/mastra-ai/mastra/commit/18c67dbb9c9ebc26f26f65f7d3ff836e5691ef46), [`4ba3bb1`](https://github.com/mastra-ai/mastra/commit/4ba3bb1e465ad2ddaba3bbf2bc47e0faec32985e), [`5d84914`](https://github.com/mastra-ai/mastra/commit/5d84914e0e520c642a40329b210b413fcd139898), [`8dcc77e`](https://github.com/mastra-ai/mastra/commit/8dcc77e78a5340f5848f74b9e9f1b3da3513c1f5), [`aa67fc5`](https://github.com/mastra-ai/mastra/commit/aa67fc59ee8a5eeff1f23eb05970b8d7a536c8ff), [`fd2f314`](https://github.com/mastra-ai/mastra/commit/fd2f31473d3449b6b97e837ef8641264377f41a7), [`fa8140b`](https://github.com/mastra-ai/mastra/commit/fa8140bcd4251d2e3ac85fdc5547dfc4f372b5be), [`190f452`](https://github.com/mastra-ai/mastra/commit/190f45258b0640e2adfc8219fa3258cdc5b8f071), [`e80fead`](https://github.com/mastra-ai/mastra/commit/e80fead1412cc0d1b2f7d6a1ce5017d9e0098ff7), [`0287b64`](https://github.com/mastra-ai/mastra/commit/0287b644a5c3272755cf3112e71338106664103b), [`7e7bf60`](https://github.com/mastra-ai/mastra/commit/7e7bf606886bf374a6f9d4ca9b09dd83d0533372), [`184907d`](https://github.com/mastra-ai/mastra/commit/184907d775d8609c03c26e78ccaf37315f3aa287), [`075e91a`](https://github.com/mastra-ai/mastra/commit/075e91a4549baf46ad7a42a6a8ac8dfa78cc09e6), [`0c4cd13`](https://github.com/mastra-ai/mastra/commit/0c4cd131931c04ac5405373c932a242dbe88edd6), [`b16a753`](https://github.com/mastra-ai/mastra/commit/b16a753d5748440248d7df82e29bb987a9c8386c)]:
14
+ - @mastra/core@1.25.0
15
+
16
+ ## 0.2.0-alpha.0
17
+
18
+ ### Minor Changes
19
+
20
+ - Added `storageState` option and `exportStorageState()` method for lightweight auth persistence (cookies and localStorage). Also kills orphaned Chrome child processes on close to prevent zombies. ([#15194](https://github.com/mastra-ai/mastra/pull/15194))
21
+
22
+ ### Patch Changes
23
+
24
+ - AgentBrowser with default thread scope now initializes correctly. Previously, calling launch() followed by getPage() would throw "Browser not launched" when no explicit thread ID was provided. ([#15285](https://github.com/mastra-ai/mastra/pull/15285))
25
+
26
+ - Updated dependencies [[`cbdf3e1`](https://github.com/mastra-ai/mastra/commit/cbdf3e12b3d0c30a6e5347be658e2009648c130a), [`8fe46d3`](https://github.com/mastra-ai/mastra/commit/8fe46d354027f3f0f0846e64219772348de106dd), [`18c67db`](https://github.com/mastra-ai/mastra/commit/18c67dbb9c9ebc26f26f65f7d3ff836e5691ef46), [`8dcc77e`](https://github.com/mastra-ai/mastra/commit/8dcc77e78a5340f5848f74b9e9f1b3da3513c1f5), [`aa67fc5`](https://github.com/mastra-ai/mastra/commit/aa67fc59ee8a5eeff1f23eb05970b8d7a536c8ff), [`fa8140b`](https://github.com/mastra-ai/mastra/commit/fa8140bcd4251d2e3ac85fdc5547dfc4f372b5be), [`190f452`](https://github.com/mastra-ai/mastra/commit/190f45258b0640e2adfc8219fa3258cdc5b8f071), [`7e7bf60`](https://github.com/mastra-ai/mastra/commit/7e7bf606886bf374a6f9d4ca9b09dd83d0533372), [`184907d`](https://github.com/mastra-ai/mastra/commit/184907d775d8609c03c26e78ccaf37315f3aa287), [`0c4cd13`](https://github.com/mastra-ai/mastra/commit/0c4cd131931c04ac5405373c932a242dbe88edd6), [`b16a753`](https://github.com/mastra-ai/mastra/commit/b16a753d5748440248d7df82e29bb987a9c8386c)]:
27
+ - @mastra/core@1.25.0-alpha.3
28
+
3
29
  ## 0.1.0
4
30
 
5
31
  ### Minor Changes
package/README.md ADDED
@@ -0,0 +1,138 @@
1
+ # @mastra/agent-browser
2
+
3
+ Deterministic browser automation for Mastra agents using [agent-browser](https://github.com/vercel-labs/agent-browser).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @mastra/agent-browser
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```typescript
14
+ import { Agent } from '@mastra/core/agent';
15
+ import { AgentBrowser } from '@mastra/agent-browser';
16
+
17
+ // Create an AgentBrowser instance
18
+ const browser = new AgentBrowser({
19
+ headless: true,
20
+ });
21
+
22
+ // Create an agent with the browser
23
+ const agent = new Agent({
24
+ name: 'web-agent',
25
+ instructions: `You are a web automation assistant.
26
+ Use browser_snapshot to see the page structure,
27
+ then interact with elements using their refs (e.g., @e5).`,
28
+ model: 'openai/gpt-5.4',
29
+ browser,
30
+ });
31
+
32
+ // Use the agent to browse the web
33
+ const result = await agent.generate('Go to example.com and click the first link');
34
+ ```
35
+
36
+ ## Configuration
37
+
38
+ ```typescript
39
+ const browser = new AgentBrowser({
40
+ // Run headless (default: true)
41
+ headless: true,
42
+
43
+ // Viewport dimensions
44
+ viewport: { width: 1280, height: 720 },
45
+
46
+ // Default timeout for operations in ms (default: 30000)
47
+ timeout: 30000,
48
+
49
+ // CDP URL for connecting to existing browser
50
+ cdpUrl: 'ws://localhost:9222',
51
+
52
+ // Browser instance scope
53
+ // Default: 'thread' for local launch, 'shared' when cdpUrl is provided
54
+ // 'thread': Each thread gets its own browser
55
+ // 'shared': All threads share one browser
56
+ scope: 'thread',
57
+
58
+ // Screencast settings for Studio
59
+ screencast: {
60
+ enabled: true,
61
+ format: 'jpeg',
62
+ quality: 80,
63
+ },
64
+ });
65
+ ```
66
+
67
+ ## Tools
68
+
69
+ AgentBrowser exposes 15 deterministic tools using accessibility tree refs:
70
+
71
+ ### Core Tools
72
+
73
+ - **browser_goto** - Navigate to a URL
74
+ - **browser_snapshot** - Get accessibility tree with element refs (@e1, @e2, etc.)
75
+ - **browser_click** - Click an element by ref
76
+ - **browser_type** - Type text into an element
77
+ - **browser_press** - Press keyboard keys
78
+ - **browser_select** - Select option from dropdown
79
+ - **browser_scroll** - Scroll the page or element
80
+ - **browser_close** - Close the browser
81
+
82
+ ### Extended Tools
83
+
84
+ - **browser_hover** - Hover over an element
85
+ - **browser_back** - Go back in browser history
86
+ - **browser_dialog** - Handle browser dialogs (alert, confirm, prompt)
87
+ - **browser_wait** - Wait for element state changes
88
+ - **browser_tabs** - Manage browser tabs (list, new, switch, close)
89
+ - **browser_drag** - Drag and drop elements
90
+
91
+ ### Escape Hatch
92
+
93
+ - **browser_evaluate** - Execute JavaScript in the page context
94
+
95
+ ## How Refs Work
96
+
97
+ AgentBrowser uses accessibility tree refs for precise element targeting:
98
+
99
+ 1. Call `browser_snapshot` to get the page structure with refs
100
+ 2. Find the element you want to interact with
101
+ 3. Use its ref with other tools
102
+
103
+ ```text
104
+ [document] Example Page
105
+ [banner]
106
+ [link @e1] Home
107
+ [link @e2] About
108
+ [main]
109
+ [textbox @e3] Search...
110
+ [button @e4] Submit
111
+ ```
112
+
113
+ ```typescript
114
+ // Type in the search box
115
+ { tool: "browser_type", input: { ref: "@e3", text: "mastra" } }
116
+
117
+ // Click submit
118
+ { tool: "browser_click", input: { ref: "@e4" } }
119
+ ```
120
+
121
+ ## Comparison with StagehandBrowser
122
+
123
+ | Feature | AgentBrowser | StagehandBrowser |
124
+ | ----------- | ------------------------ | ---------------------------- |
125
+ | Approach | Deterministic refs (@e1) | Natural language |
126
+ | Token cost | Low | Higher (LLM calls) |
127
+ | Speed | Fast | Slower |
128
+ | Reliability | High (exact refs) | Variable (AI interpretation) |
129
+ | Best for | Structured workflows | Unknown/dynamic pages |
130
+
131
+ ## Documentation
132
+
133
+ - [agent-browser guide](https://mastra.ai/docs/browser/agent-browser) - Usage guide
134
+ - [AgentBrowser reference](https://mastra.ai/reference/browser/agent-browser) - API reference
135
+
136
+ ## License
137
+
138
+ Apache-2.0
package/dist/index.cjs CHANGED
@@ -37,7 +37,10 @@ var AgentBrowserThreadManager = class extends browser.ThreadManager {
37
37
  const manager = new agentBrowser.BrowserManager();
38
38
  const launchOptions = {
39
39
  headless: this.browserConfig.headless ?? true,
40
- viewport: this.browserConfig.viewport
40
+ viewport: this.browserConfig.viewport,
41
+ profile: this.browserConfig.profile,
42
+ executablePath: this.browserConfig.executablePath,
43
+ storageState: this.browserConfig.storageState
41
44
  };
42
45
  if (this.browserConfig.cdpUrl && this.resolveCdpUrl) {
43
46
  launchOptions.cdpUrl = await this.resolveCdpUrl(this.browserConfig.cdpUrl);
@@ -491,12 +494,36 @@ function createAgentBrowserTools(browser) {
491
494
  };
492
495
  }
493
496
 
497
+ // src/utils.ts
498
+ async function getBrowserPid(manager) {
499
+ try {
500
+ let browser = manager.getBrowser();
501
+ if (!browser) {
502
+ const ctx = manager.getContext();
503
+ browser = ctx?.browser?.() ?? null;
504
+ }
505
+ if (!browser) return void 0;
506
+ const cdp = await browser.newBrowserCDPSession();
507
+ try {
508
+ const info = await cdp.send("SystemInfo.getProcessInfo");
509
+ const browserProcess = info.processInfo?.find((p) => p.type === "browser");
510
+ return browserProcess?.id;
511
+ } finally {
512
+ await cdp.detach().catch(() => void 0);
513
+ }
514
+ } catch {
515
+ return void 0;
516
+ }
517
+ }
518
+
494
519
  // src/agent-browser.ts
495
520
  var AgentBrowser = class extends browser.MastraBrowser {
496
521
  id;
497
522
  name = "AgentBrowser";
498
523
  provider = "vercel-labs/agent-browser";
499
524
  defaultTimeout = 3e4;
525
+ /** Pending PID lookups — awaited in disconnect handlers to avoid racing. */
526
+ pidLookups = /* @__PURE__ */ new Set();
500
527
  constructor(config = {}) {
501
528
  super(config);
502
529
  this.id = `agent-browser-${Date.now()}`;
@@ -534,11 +561,11 @@ var AgentBrowser = class extends browser.MastraBrowser {
534
561
  const scope = this.threadManager.getScope();
535
562
  const threadId = this.getCurrentThread();
536
563
  const existingSession = this.threadManager.hasSession(threadId);
537
- if (scope === "thread" && threadId !== browser.DEFAULT_THREAD_ID && !existingSession) {
564
+ if (scope === "thread" && !existingSession) {
538
565
  await this.getManagerForThread(threadId);
539
566
  }
540
567
  await super.ensureReady();
541
- if (scope === "thread" && threadId !== browser.DEFAULT_THREAD_ID && existingSession) {
568
+ if (scope === "thread" && existingSession) {
542
569
  await this.getManagerForThread(threadId);
543
570
  }
544
571
  }
@@ -571,7 +598,10 @@ var AgentBrowser = class extends browser.MastraBrowser {
571
598
  const localConfig = this.config;
572
599
  const launchOptions = {
573
600
  headless: localConfig.headless ?? true,
574
- viewport: localConfig.viewport
601
+ viewport: localConfig.viewport,
602
+ profile: localConfig.profile,
603
+ executablePath: localConfig.executablePath,
604
+ storageState: localConfig.storageState
575
605
  };
576
606
  if (localConfig.cdpUrl) {
577
607
  launchOptions.cdpUrl = await this.resolveCdpUrl(localConfig.cdpUrl);
@@ -586,11 +616,15 @@ var AgentBrowser = class extends browser.MastraBrowser {
586
616
  */
587
617
  setupCloseListenerForSharedScope(manager) {
588
618
  try {
619
+ const pidLookup = getBrowserPid(manager).then((pid) => {
620
+ if (pid && this.sharedManager === manager) this.sharedBrowserPid = pid;
621
+ }).finally(() => this.pidLookups.delete(pidLookup));
622
+ this.pidLookups.add(pidLookup);
589
623
  let disconnectHandled = false;
590
624
  const handleDisconnect = () => {
591
625
  if (disconnectHandled) return;
592
626
  disconnectHandled = true;
593
- this.handleBrowserDisconnected();
627
+ void pidLookup.catch(() => void 0).then(() => this.handleBrowserDisconnected());
594
628
  };
595
629
  const context = manager.getContext();
596
630
  if (context) {
@@ -609,6 +643,8 @@ var AgentBrowser = class extends browser.MastraBrowser {
609
643
  }
610
644
  }
611
645
  async doClose() {
646
+ await Promise.allSettled([...this.pidLookups]);
647
+ this.pidLookups.clear();
612
648
  await this.threadManager.destroyAllSessions();
613
649
  this.setCurrentThread(void 0);
614
650
  const scope = this.threadManager.getScope();
@@ -692,11 +728,17 @@ var AgentBrowser = class extends browser.MastraBrowser {
692
728
  */
693
729
  setupCloseListenerForThread(manager, threadId) {
694
730
  try {
731
+ const pidLookup = getBrowserPid(manager).then((pid) => {
732
+ if (pid && this.threadManager?.getExistingManagerForThread(threadId) === manager) {
733
+ this.threadBrowserPids.set(threadId, pid);
734
+ }
735
+ }).finally(() => this.pidLookups.delete(pidLookup));
736
+ this.pidLookups.add(pidLookup);
695
737
  let disconnectHandled = false;
696
738
  const handleDisconnect = () => {
697
739
  if (disconnectHandled) return;
698
740
  disconnectHandled = true;
699
- this.handleThreadBrowserDisconnected(threadId);
741
+ void pidLookup.catch(() => void 0).then(() => this.handleThreadBrowserDisconnected(threadId));
700
742
  };
701
743
  const context = manager.getContext();
702
744
  if (context) {
@@ -886,6 +928,25 @@ var AgentBrowser = class extends browser.MastraBrowser {
886
928
  return 0;
887
929
  }
888
930
  }
931
+ /**
932
+ * Export the current browser session's storage state (cookies, localStorage) to a JSON file.
933
+ * This can later be loaded via the `storageState` config option to restore the session.
934
+ *
935
+ * @param path - File path to save the storage state JSON
936
+ * @param threadId - Optional thread ID (defaults to current thread)
937
+ */
938
+ async exportStorageState(path, threadId) {
939
+ const effectiveThreadId = threadId ?? this.getCurrentThread();
940
+ const manager = this.threadManager.getExistingManagerForThread(effectiveThreadId);
941
+ if (!manager) {
942
+ throw new Error("No browser is running. Launch a browser first before exporting storage state.");
943
+ }
944
+ const context = manager.getContext();
945
+ if (!context) {
946
+ throw new Error("Browser context not available");
947
+ }
948
+ await context.storageState({ path });
949
+ }
889
950
  // ---------------------------------------------------------------------------
890
951
  // 1. browser_goto - Navigate to URL
891
952
  // ---------------------------------------------------------------------------
@@ -1564,6 +1625,7 @@ exports.createAgentBrowserTools = createAgentBrowserTools;
1564
1625
  exports.dialogInputSchema = dialogInputSchema;
1565
1626
  exports.dragInputSchema = dragInputSchema;
1566
1627
  exports.evaluateInputSchema = evaluateInputSchema;
1628
+ exports.getBrowserPid = getBrowserPid;
1567
1629
  exports.gotoInputSchema = gotoInputSchema;
1568
1630
  exports.hoverInputSchema = hoverInputSchema;
1569
1631
  exports.pressInputSchema = pressInputSchema;