@mastra/agent-browser 0.1.0 → 0.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +138 -0
- package/dist/index.cjs +68 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +42 -3
- package/dist/index.d.ts +42 -3
- package/dist/index.js +68 -7
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @mastra/agent-browser
|
|
2
2
|
|
|
3
|
+
## 0.2.0-alpha.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- Added `storageState` option and `exportStorageState()` method for lightweight auth persistence (cookies and localStorage). Also kills orphaned Chrome child processes on close to prevent zombies. ([#15194](https://github.com/mastra-ai/mastra/pull/15194))
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- AgentBrowser with default thread scope now initializes correctly. Previously, calling launch() followed by getPage() would throw "Browser not launched" when no explicit thread ID was provided. ([#15285](https://github.com/mastra-ai/mastra/pull/15285))
|
|
12
|
+
|
|
13
|
+
- Updated dependencies [[`cbdf3e1`](https://github.com/mastra-ai/mastra/commit/cbdf3e12b3d0c30a6e5347be658e2009648c130a), [`8fe46d3`](https://github.com/mastra-ai/mastra/commit/8fe46d354027f3f0f0846e64219772348de106dd), [`18c67db`](https://github.com/mastra-ai/mastra/commit/18c67dbb9c9ebc26f26f65f7d3ff836e5691ef46), [`8dcc77e`](https://github.com/mastra-ai/mastra/commit/8dcc77e78a5340f5848f74b9e9f1b3da3513c1f5), [`aa67fc5`](https://github.com/mastra-ai/mastra/commit/aa67fc59ee8a5eeff1f23eb05970b8d7a536c8ff), [`fa8140b`](https://github.com/mastra-ai/mastra/commit/fa8140bcd4251d2e3ac85fdc5547dfc4f372b5be), [`190f452`](https://github.com/mastra-ai/mastra/commit/190f45258b0640e2adfc8219fa3258cdc5b8f071), [`7e7bf60`](https://github.com/mastra-ai/mastra/commit/7e7bf606886bf374a6f9d4ca9b09dd83d0533372), [`184907d`](https://github.com/mastra-ai/mastra/commit/184907d775d8609c03c26e78ccaf37315f3aa287), [`0c4cd13`](https://github.com/mastra-ai/mastra/commit/0c4cd131931c04ac5405373c932a242dbe88edd6), [`b16a753`](https://github.com/mastra-ai/mastra/commit/b16a753d5748440248d7df82e29bb987a9c8386c)]:
|
|
14
|
+
- @mastra/core@1.25.0-alpha.3
|
|
15
|
+
|
|
3
16
|
## 0.1.0
|
|
4
17
|
|
|
5
18
|
### Minor Changes
|
package/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# @mastra/agent-browser
|
|
2
|
+
|
|
3
|
+
Deterministic browser automation for Mastra agents using [agent-browser](https://github.com/vercel-labs/agent-browser).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @mastra/agent-browser
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { Agent } from '@mastra/core/agent';
|
|
15
|
+
import { AgentBrowser } from '@mastra/agent-browser';
|
|
16
|
+
|
|
17
|
+
// Create an AgentBrowser instance
|
|
18
|
+
const browser = new AgentBrowser({
|
|
19
|
+
headless: true,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// Create an agent with the browser
|
|
23
|
+
const agent = new Agent({
|
|
24
|
+
name: 'web-agent',
|
|
25
|
+
instructions: `You are a web automation assistant.
|
|
26
|
+
Use browser_snapshot to see the page structure,
|
|
27
|
+
then interact with elements using their refs (e.g., @e5).`,
|
|
28
|
+
model: 'openai/gpt-5.4',
|
|
29
|
+
browser,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Use the agent to browse the web
|
|
33
|
+
const result = await agent.generate('Go to example.com and click the first link');
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Configuration
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
const browser = new AgentBrowser({
|
|
40
|
+
// Run headless (default: true)
|
|
41
|
+
headless: true,
|
|
42
|
+
|
|
43
|
+
// Viewport dimensions
|
|
44
|
+
viewport: { width: 1280, height: 720 },
|
|
45
|
+
|
|
46
|
+
// Default timeout for operations in ms (default: 30000)
|
|
47
|
+
timeout: 30000,
|
|
48
|
+
|
|
49
|
+
// CDP URL for connecting to existing browser
|
|
50
|
+
cdpUrl: 'ws://localhost:9222',
|
|
51
|
+
|
|
52
|
+
// Browser instance scope
|
|
53
|
+
// Default: 'thread' for local launch, 'shared' when cdpUrl is provided
|
|
54
|
+
// 'thread': Each thread gets its own browser
|
|
55
|
+
// 'shared': All threads share one browser
|
|
56
|
+
scope: 'thread',
|
|
57
|
+
|
|
58
|
+
// Screencast settings for Studio
|
|
59
|
+
screencast: {
|
|
60
|
+
enabled: true,
|
|
61
|
+
format: 'jpeg',
|
|
62
|
+
quality: 80,
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Tools
|
|
68
|
+
|
|
69
|
+
AgentBrowser exposes 15 deterministic tools using accessibility tree refs:
|
|
70
|
+
|
|
71
|
+
### Core Tools
|
|
72
|
+
|
|
73
|
+
- **browser_goto** - Navigate to a URL
|
|
74
|
+
- **browser_snapshot** - Get accessibility tree with element refs (@e1, @e2, etc.)
|
|
75
|
+
- **browser_click** - Click an element by ref
|
|
76
|
+
- **browser_type** - Type text into an element
|
|
77
|
+
- **browser_press** - Press keyboard keys
|
|
78
|
+
- **browser_select** - Select option from dropdown
|
|
79
|
+
- **browser_scroll** - Scroll the page or element
|
|
80
|
+
- **browser_close** - Close the browser
|
|
81
|
+
|
|
82
|
+
### Extended Tools
|
|
83
|
+
|
|
84
|
+
- **browser_hover** - Hover over an element
|
|
85
|
+
- **browser_back** - Go back in browser history
|
|
86
|
+
- **browser_dialog** - Handle browser dialogs (alert, confirm, prompt)
|
|
87
|
+
- **browser_wait** - Wait for element state changes
|
|
88
|
+
- **browser_tabs** - Manage browser tabs (list, new, switch, close)
|
|
89
|
+
- **browser_drag** - Drag and drop elements
|
|
90
|
+
|
|
91
|
+
### Escape Hatch
|
|
92
|
+
|
|
93
|
+
- **browser_evaluate** - Execute JavaScript in the page context
|
|
94
|
+
|
|
95
|
+
## How Refs Work
|
|
96
|
+
|
|
97
|
+
AgentBrowser uses accessibility tree refs for precise element targeting:
|
|
98
|
+
|
|
99
|
+
1. Call `browser_snapshot` to get the page structure with refs
|
|
100
|
+
2. Find the element you want to interact with
|
|
101
|
+
3. Use its ref with other tools
|
|
102
|
+
|
|
103
|
+
```text
|
|
104
|
+
[document] Example Page
|
|
105
|
+
[banner]
|
|
106
|
+
[link @e1] Home
|
|
107
|
+
[link @e2] About
|
|
108
|
+
[main]
|
|
109
|
+
[textbox @e3] Search...
|
|
110
|
+
[button @e4] Submit
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
// Type in the search box
|
|
115
|
+
{ tool: "browser_type", input: { ref: "@e3", text: "mastra" } }
|
|
116
|
+
|
|
117
|
+
// Click submit
|
|
118
|
+
{ tool: "browser_click", input: { ref: "@e4" } }
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Comparison with StagehandBrowser
|
|
122
|
+
|
|
123
|
+
| Feature | AgentBrowser | StagehandBrowser |
|
|
124
|
+
| ----------- | ------------------------ | ---------------------------- |
|
|
125
|
+
| Approach | Deterministic refs (@e1) | Natural language |
|
|
126
|
+
| Token cost | Low | Higher (LLM calls) |
|
|
127
|
+
| Speed | Fast | Slower |
|
|
128
|
+
| Reliability | High (exact refs) | Variable (AI interpretation) |
|
|
129
|
+
| Best for | Structured workflows | Unknown/dynamic pages |
|
|
130
|
+
|
|
131
|
+
## Documentation
|
|
132
|
+
|
|
133
|
+
- [agent-browser guide](https://mastra.ai/docs/browser/agent-browser) - Usage guide
|
|
134
|
+
- [AgentBrowser reference](https://mastra.ai/reference/browser/agent-browser) - API reference
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
Apache-2.0
|
package/dist/index.cjs
CHANGED
|
@@ -37,7 +37,10 @@ var AgentBrowserThreadManager = class extends browser.ThreadManager {
|
|
|
37
37
|
const manager = new agentBrowser.BrowserManager();
|
|
38
38
|
const launchOptions = {
|
|
39
39
|
headless: this.browserConfig.headless ?? true,
|
|
40
|
-
viewport: this.browserConfig.viewport
|
|
40
|
+
viewport: this.browserConfig.viewport,
|
|
41
|
+
profile: this.browserConfig.profile,
|
|
42
|
+
executablePath: this.browserConfig.executablePath,
|
|
43
|
+
storageState: this.browserConfig.storageState
|
|
41
44
|
};
|
|
42
45
|
if (this.browserConfig.cdpUrl && this.resolveCdpUrl) {
|
|
43
46
|
launchOptions.cdpUrl = await this.resolveCdpUrl(this.browserConfig.cdpUrl);
|
|
@@ -491,12 +494,36 @@ function createAgentBrowserTools(browser) {
|
|
|
491
494
|
};
|
|
492
495
|
}
|
|
493
496
|
|
|
497
|
+
// src/utils.ts
|
|
498
|
+
async function getBrowserPid(manager) {
|
|
499
|
+
try {
|
|
500
|
+
let browser = manager.getBrowser();
|
|
501
|
+
if (!browser) {
|
|
502
|
+
const ctx = manager.getContext();
|
|
503
|
+
browser = ctx?.browser?.() ?? null;
|
|
504
|
+
}
|
|
505
|
+
if (!browser) return void 0;
|
|
506
|
+
const cdp = await browser.newBrowserCDPSession();
|
|
507
|
+
try {
|
|
508
|
+
const info = await cdp.send("SystemInfo.getProcessInfo");
|
|
509
|
+
const browserProcess = info.processInfo?.find((p) => p.type === "browser");
|
|
510
|
+
return browserProcess?.id;
|
|
511
|
+
} finally {
|
|
512
|
+
await cdp.detach().catch(() => void 0);
|
|
513
|
+
}
|
|
514
|
+
} catch {
|
|
515
|
+
return void 0;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
494
519
|
// src/agent-browser.ts
|
|
495
520
|
var AgentBrowser = class extends browser.MastraBrowser {
|
|
496
521
|
id;
|
|
497
522
|
name = "AgentBrowser";
|
|
498
523
|
provider = "vercel-labs/agent-browser";
|
|
499
524
|
defaultTimeout = 3e4;
|
|
525
|
+
/** Pending PID lookups — awaited in disconnect handlers to avoid racing. */
|
|
526
|
+
pidLookups = /* @__PURE__ */ new Set();
|
|
500
527
|
constructor(config = {}) {
|
|
501
528
|
super(config);
|
|
502
529
|
this.id = `agent-browser-${Date.now()}`;
|
|
@@ -534,11 +561,11 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
534
561
|
const scope = this.threadManager.getScope();
|
|
535
562
|
const threadId = this.getCurrentThread();
|
|
536
563
|
const existingSession = this.threadManager.hasSession(threadId);
|
|
537
|
-
if (scope === "thread" &&
|
|
564
|
+
if (scope === "thread" && !existingSession) {
|
|
538
565
|
await this.getManagerForThread(threadId);
|
|
539
566
|
}
|
|
540
567
|
await super.ensureReady();
|
|
541
|
-
if (scope === "thread" &&
|
|
568
|
+
if (scope === "thread" && existingSession) {
|
|
542
569
|
await this.getManagerForThread(threadId);
|
|
543
570
|
}
|
|
544
571
|
}
|
|
@@ -571,7 +598,10 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
571
598
|
const localConfig = this.config;
|
|
572
599
|
const launchOptions = {
|
|
573
600
|
headless: localConfig.headless ?? true,
|
|
574
|
-
viewport: localConfig.viewport
|
|
601
|
+
viewport: localConfig.viewport,
|
|
602
|
+
profile: localConfig.profile,
|
|
603
|
+
executablePath: localConfig.executablePath,
|
|
604
|
+
storageState: localConfig.storageState
|
|
575
605
|
};
|
|
576
606
|
if (localConfig.cdpUrl) {
|
|
577
607
|
launchOptions.cdpUrl = await this.resolveCdpUrl(localConfig.cdpUrl);
|
|
@@ -586,11 +616,15 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
586
616
|
*/
|
|
587
617
|
setupCloseListenerForSharedScope(manager) {
|
|
588
618
|
try {
|
|
619
|
+
const pidLookup = getBrowserPid(manager).then((pid) => {
|
|
620
|
+
if (pid && this.sharedManager === manager) this.sharedBrowserPid = pid;
|
|
621
|
+
}).finally(() => this.pidLookups.delete(pidLookup));
|
|
622
|
+
this.pidLookups.add(pidLookup);
|
|
589
623
|
let disconnectHandled = false;
|
|
590
624
|
const handleDisconnect = () => {
|
|
591
625
|
if (disconnectHandled) return;
|
|
592
626
|
disconnectHandled = true;
|
|
593
|
-
this.handleBrowserDisconnected();
|
|
627
|
+
void pidLookup.catch(() => void 0).then(() => this.handleBrowserDisconnected());
|
|
594
628
|
};
|
|
595
629
|
const context = manager.getContext();
|
|
596
630
|
if (context) {
|
|
@@ -609,6 +643,8 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
609
643
|
}
|
|
610
644
|
}
|
|
611
645
|
async doClose() {
|
|
646
|
+
await Promise.allSettled([...this.pidLookups]);
|
|
647
|
+
this.pidLookups.clear();
|
|
612
648
|
await this.threadManager.destroyAllSessions();
|
|
613
649
|
this.setCurrentThread(void 0);
|
|
614
650
|
const scope = this.threadManager.getScope();
|
|
@@ -692,11 +728,17 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
692
728
|
*/
|
|
693
729
|
setupCloseListenerForThread(manager, threadId) {
|
|
694
730
|
try {
|
|
731
|
+
const pidLookup = getBrowserPid(manager).then((pid) => {
|
|
732
|
+
if (pid && this.threadManager?.getExistingManagerForThread(threadId) === manager) {
|
|
733
|
+
this.threadBrowserPids.set(threadId, pid);
|
|
734
|
+
}
|
|
735
|
+
}).finally(() => this.pidLookups.delete(pidLookup));
|
|
736
|
+
this.pidLookups.add(pidLookup);
|
|
695
737
|
let disconnectHandled = false;
|
|
696
738
|
const handleDisconnect = () => {
|
|
697
739
|
if (disconnectHandled) return;
|
|
698
740
|
disconnectHandled = true;
|
|
699
|
-
this.handleThreadBrowserDisconnected(threadId);
|
|
741
|
+
void pidLookup.catch(() => void 0).then(() => this.handleThreadBrowserDisconnected(threadId));
|
|
700
742
|
};
|
|
701
743
|
const context = manager.getContext();
|
|
702
744
|
if (context) {
|
|
@@ -886,6 +928,25 @@ var AgentBrowser = class extends browser.MastraBrowser {
|
|
|
886
928
|
return 0;
|
|
887
929
|
}
|
|
888
930
|
}
|
|
931
|
+
/**
|
|
932
|
+
* Export the current browser session's storage state (cookies, localStorage) to a JSON file.
|
|
933
|
+
* This can later be loaded via the `storageState` config option to restore the session.
|
|
934
|
+
*
|
|
935
|
+
* @param path - File path to save the storage state JSON
|
|
936
|
+
* @param threadId - Optional thread ID (defaults to current thread)
|
|
937
|
+
*/
|
|
938
|
+
async exportStorageState(path, threadId) {
|
|
939
|
+
const effectiveThreadId = threadId ?? this.getCurrentThread();
|
|
940
|
+
const manager = this.threadManager.getExistingManagerForThread(effectiveThreadId);
|
|
941
|
+
if (!manager) {
|
|
942
|
+
throw new Error("No browser is running. Launch a browser first before exporting storage state.");
|
|
943
|
+
}
|
|
944
|
+
const context = manager.getContext();
|
|
945
|
+
if (!context) {
|
|
946
|
+
throw new Error("Browser context not available");
|
|
947
|
+
}
|
|
948
|
+
await context.storageState({ path });
|
|
949
|
+
}
|
|
889
950
|
// ---------------------------------------------------------------------------
|
|
890
951
|
// 1. browser_goto - Navigate to URL
|
|
891
952
|
// ---------------------------------------------------------------------------
|
|
@@ -1564,6 +1625,7 @@ exports.createAgentBrowserTools = createAgentBrowserTools;
|
|
|
1564
1625
|
exports.dialogInputSchema = dialogInputSchema;
|
|
1565
1626
|
exports.dragInputSchema = dragInputSchema;
|
|
1566
1627
|
exports.evaluateInputSchema = evaluateInputSchema;
|
|
1628
|
+
exports.getBrowserPid = getBrowserPid;
|
|
1567
1629
|
exports.gotoInputSchema = gotoInputSchema;
|
|
1568
1630
|
exports.hoverInputSchema = hoverInputSchema;
|
|
1569
1631
|
exports.pressInputSchema = pressInputSchema;
|