@mastra/agent-browser 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1736 @@
1
+ 'use strict';
2
+
3
+ var browser = require('@mastra/core/browser');
4
+ var agentBrowser = require('agent-browser');
5
+ var tools = require('@mastra/core/tools');
6
+ var zod = require('zod');
7
+
8
+ // src/agent-browser.ts
9
+ var AgentBrowserThreadManager = class extends browser.ThreadManager {
10
+ sharedManager = null;
11
+ browserConfig;
12
+ resolveCdpUrl;
13
+ onBrowserCreated;
14
+ /** Map of thread ID to dedicated browser manager (for 'thread' scope) */
15
+ threadBrowsers = /* @__PURE__ */ new Map();
16
+ constructor(config) {
17
+ super(config);
18
+ this.browserConfig = config.browserConfig;
19
+ this.resolveCdpUrl = config.resolveCdpUrl;
20
+ this.onBrowserCreated = config.onBrowserCreated;
21
+ }
22
+ /**
23
+ * Set the shared browser manager (called after browser launch).
24
+ */
25
+ setSharedManager(manager) {
26
+ this.sharedManager = manager;
27
+ }
28
+ /**
29
+ * Clear the shared browser manager (called when browser disconnects).
30
+ */
31
+ clearSharedManager() {
32
+ this.sharedManager = null;
33
+ }
34
+ /**
35
+ * Get the shared browser manager.
36
+ */
37
+ getSharedManager() {
38
+ if (!this.sharedManager) {
39
+ throw new Error("Browser not launched");
40
+ }
41
+ return this.sharedManager;
42
+ }
43
+ /**
44
+ * Create a new session for a thread.
45
+ */
46
+ async createSession(threadId) {
47
+ const savedState = this.getSavedBrowserState(threadId);
48
+ const session = {
49
+ threadId,
50
+ createdAt: Date.now(),
51
+ browserState: savedState
52
+ };
53
+ if (this.scope === "thread") {
54
+ const manager = new agentBrowser.BrowserManager();
55
+ const launchOptions = {
56
+ headless: this.browserConfig.headless ?? true,
57
+ viewport: this.browserConfig.viewport
58
+ };
59
+ if (this.browserConfig.cdpUrl && this.resolveCdpUrl) {
60
+ launchOptions.cdpUrl = await this.resolveCdpUrl(this.browserConfig.cdpUrl);
61
+ }
62
+ try {
63
+ await manager.launch(launchOptions);
64
+ } catch (error) {
65
+ try {
66
+ await manager.close();
67
+ } catch {
68
+ }
69
+ throw error;
70
+ }
71
+ session.manager = manager;
72
+ this.threadBrowsers.set(threadId, manager);
73
+ try {
74
+ if (savedState && savedState.tabs.length > 0) {
75
+ this.logger?.debug?.(`Restoring browser state for thread ${threadId}: ${savedState.tabs.length} tabs`);
76
+ await this.restoreBrowserState(manager, savedState);
77
+ }
78
+ this.onBrowserCreated?.(manager, threadId);
79
+ } catch (error) {
80
+ this.threadBrowsers.delete(threadId);
81
+ session.manager = void 0;
82
+ try {
83
+ await manager.close();
84
+ } catch {
85
+ }
86
+ throw error;
87
+ }
88
+ }
89
+ return session;
90
+ }
91
+ /**
92
+ * Restore browser state (multiple tabs) to a browser manager.
93
+ */
94
+ async restoreBrowserState(manager, state) {
95
+ try {
96
+ const firstTab = state.tabs[0];
97
+ if (firstTab?.url) {
98
+ const page = manager.getPage();
99
+ if (page) {
100
+ await page.goto(firstTab.url, { waitUntil: "domcontentloaded" });
101
+ }
102
+ }
103
+ for (let i = 1; i < state.tabs.length; i++) {
104
+ const tab = state.tabs[i];
105
+ if (tab?.url) {
106
+ await manager.newTab();
107
+ const page = manager.getPage();
108
+ if (page) {
109
+ await page.goto(tab.url, { waitUntil: "domcontentloaded" });
110
+ }
111
+ }
112
+ }
113
+ if (state.tabs.length > 1 && state.activeTabIndex >= 0 && state.activeTabIndex < state.tabs.length) {
114
+ await manager.switchTo(state.activeTabIndex);
115
+ }
116
+ } catch (error) {
117
+ this.logger?.warn?.(`Failed to restore browser state: ${error}`);
118
+ }
119
+ }
120
+ /**
121
+ * Switch to an existing session.
122
+ * For 'thread' scope, no switching needed - each thread has its own manager.
123
+ * For 'shared' scope, nothing to switch.
124
+ */
125
+ async switchToSession(_session) {
126
+ }
127
+ /**
128
+ * Get the browser manager for a specific session.
129
+ */
130
+ getManagerForSession(session) {
131
+ if (this.scope === "thread" && session.manager) {
132
+ return session.manager;
133
+ }
134
+ return this.getSharedManager();
135
+ }
136
+ /**
137
+ * Destroy a session and clean up resources.
138
+ */
139
+ async doDestroySession(session) {
140
+ if (this.scope === "thread" && session.manager) {
141
+ await session.manager.close();
142
+ this.threadBrowsers.delete(session.threadId);
143
+ }
144
+ }
145
+ /**
146
+ * Destroy all sessions (called during browser close).
147
+ */
148
+ async destroyAllSessions() {
149
+ for (const [threadId, manager] of this.threadBrowsers) {
150
+ try {
151
+ await manager.close();
152
+ } catch {
153
+ this.logger?.debug?.(`Failed to close browser for thread: ${threadId}`);
154
+ }
155
+ }
156
+ this.threadBrowsers.clear();
157
+ await super.destroyAllSessions();
158
+ }
159
+ /**
160
+ * Check if any thread browsers are still running.
161
+ */
162
+ hasActiveThreadBrowsers() {
163
+ return this.threadBrowsers.size > 0;
164
+ }
165
+ /**
166
+ * Get the browser manager for an existing thread session without creating a new one.
167
+ * Returns null if no session exists for the thread.
168
+ */
169
+ getExistingManagerForThread(threadId) {
170
+ if (this.scope === "thread") {
171
+ return this.threadBrowsers.get(threadId) ?? null;
172
+ }
173
+ return this.sharedManager;
174
+ }
175
+ /**
176
+ * Clear all session tracking without closing browsers.
177
+ * Used when browsers have been externally closed and we just need to reset state.
178
+ */
179
+ clearAllSessions() {
180
+ this.threadBrowsers.clear();
181
+ this.sessions.clear();
182
+ }
183
+ /**
184
+ * Clear a specific thread's session without closing the browser.
185
+ * Used when a thread's browser has been externally closed.
186
+ * Preserves the browser state for potential restoration.
187
+ * @param threadId - The thread ID to clear
188
+ */
189
+ clearSession(threadId) {
190
+ const session = this.sessions.get(threadId);
191
+ if (session?.browserState) {
192
+ this.savedBrowserStates.set(threadId, session.browserState);
193
+ }
194
+ this.threadBrowsers.delete(threadId);
195
+ this.sessions.delete(threadId);
196
+ }
197
+ };
198
+ var gotoInputSchema = zod.z.object({
199
+ url: zod.z.string().describe("The URL to navigate to"),
200
+ waitUntil: zod.z.enum(["load", "domcontentloaded", "networkidle"]).optional().describe("When to consider navigation complete (default: domcontentloaded)"),
201
+ timeout: zod.z.number().optional().describe("Navigation timeout in milliseconds")
202
+ });
203
+ var snapshotInputSchema = zod.z.object({
204
+ interactiveOnly: zod.z.boolean().optional().describe("Only include interactive elements (default: true)"),
205
+ maxDepth: zod.z.number().optional().describe("Maximum depth of the tree to return")
206
+ });
207
+ var clickInputSchema = zod.z.object({
208
+ ref: zod.z.string().describe("Element ref from snapshot (e.g., @e5)"),
209
+ button: zod.z.enum(["left", "right", "middle"]).optional().describe("Mouse button (default: left)"),
210
+ clickCount: zod.z.number().optional().describe("Number of clicks (default: 1, use 2 for double-click)"),
211
+ modifiers: zod.z.array(zod.z.enum(["Alt", "Control", "Meta", "Shift"])).optional().describe("Modifier keys to hold")
212
+ });
213
+ var typeInputSchema = zod.z.object({
214
+ ref: zod.z.string().describe("Element ref from snapshot"),
215
+ text: zod.z.string().describe("Text to type"),
216
+ clear: zod.z.boolean().optional().describe("Clear existing content before typing (default: false)"),
217
+ delay: zod.z.number().optional().describe("Delay between keystrokes in ms")
218
+ });
219
+ var pressInputSchema = zod.z.object({
220
+ key: zod.z.string().describe("Key to press (e.g., Enter, Tab, Escape, Control+a)"),
221
+ modifiers: zod.z.array(zod.z.enum(["Alt", "Control", "Meta", "Shift"])).optional().describe("Modifier keys to hold")
222
+ });
223
+ var selectInputSchema = zod.z.object({
224
+ ref: zod.z.string().describe("Select element ref from snapshot"),
225
+ value: zod.z.string().optional().describe("Option value to select"),
226
+ label: zod.z.string().optional().describe("Option label to select"),
227
+ index: zod.z.number().int().min(0).optional().describe("Option index to select (0-based)")
228
+ }).superRefine((data, ctx) => {
229
+ if (data.value === void 0 && data.label === void 0 && data.index === void 0) {
230
+ ctx.addIssue({
231
+ code: zod.z.ZodIssueCode.custom,
232
+ message: "At least one of value, label, or index is required"
233
+ });
234
+ }
235
+ });
236
+ var scrollInputSchema = zod.z.object({
237
+ direction: zod.z.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
238
+ amount: zod.z.number().optional().describe("Scroll amount in pixels (default: 300)"),
239
+ ref: zod.z.string().optional().describe("Element ref to scroll (scrolls page if omitted)")
240
+ });
241
+ var closeInputSchema = zod.z.object({});
242
+ var hoverInputSchema = zod.z.object({
243
+ ref: zod.z.string().describe("Element ref from snapshot")
244
+ });
245
+ var backInputSchema = zod.z.object({});
246
+ var dialogInputSchema = zod.z.object({
247
+ triggerRef: zod.z.string().describe("Element ref that triggers the dialog (e.g., @e5)"),
248
+ action: zod.z.enum(["accept", "dismiss"]).describe("Accept or dismiss the dialog"),
249
+ text: zod.z.string().optional().describe("Text to enter for prompt dialogs")
250
+ });
251
+ var waitInputSchema = zod.z.object({
252
+ ref: zod.z.string().optional().describe("Element ref to wait for"),
253
+ state: zod.z.enum(["visible", "hidden", "attached", "detached"]).optional().describe("State to wait for (default: visible)"),
254
+ timeout: zod.z.number().optional().describe("Maximum wait time in ms (default: 30000)")
255
+ });
256
+ var tabsInputSchema = zod.z.object({
257
+ action: zod.z.enum(["list", "new", "switch", "close"]).describe("Tab action"),
258
+ index: zod.z.number().int().min(0).optional().describe("Tab index for switch/close"),
259
+ url: zod.z.string().optional().describe("URL to open in new tab")
260
+ }).superRefine((value, ctx) => {
261
+ if (value.action === "switch" && value.index === void 0) {
262
+ ctx.addIssue({
263
+ code: zod.z.ZodIssueCode.custom,
264
+ path: ["index"],
265
+ message: 'index is required when action is "switch"'
266
+ });
267
+ }
268
+ });
269
+ var dragInputSchema = zod.z.object({
270
+ sourceRef: zod.z.string().optional().describe("Element ref to drag from (e.g., @e5)"),
271
+ targetRef: zod.z.string().optional().describe("Element ref to drag to (e.g., @e7)"),
272
+ sourceSelector: zod.z.string().optional().describe("CSS selector for source element (use if ref not available)"),
273
+ targetSelector: zod.z.string().optional().describe("CSS selector for target element (use if ref not available)")
274
+ }).superRefine((data, ctx) => {
275
+ if (!data.sourceRef && !data.sourceSelector) {
276
+ ctx.addIssue({
277
+ code: zod.z.ZodIssueCode.custom,
278
+ path: ["sourceRef"],
279
+ message: "Either sourceRef or sourceSelector is required"
280
+ });
281
+ }
282
+ if (!data.targetRef && !data.targetSelector) {
283
+ ctx.addIssue({
284
+ code: zod.z.ZodIssueCode.custom,
285
+ path: ["targetRef"],
286
+ message: "Either targetRef or targetSelector is required"
287
+ });
288
+ }
289
+ });
290
+ var evaluateInputSchema = zod.z.object({
291
+ script: zod.z.string().describe("JavaScript code to execute"),
292
+ arg: zod.z.unknown().optional().describe("Argument to pass to the script (JSON-serializable)")
293
+ });
294
+ var browserSchemas = {
295
+ // Core
296
+ goto: gotoInputSchema,
297
+ snapshot: snapshotInputSchema,
298
+ click: clickInputSchema,
299
+ type: typeInputSchema,
300
+ press: pressInputSchema,
301
+ select: selectInputSchema,
302
+ scroll: scrollInputSchema,
303
+ close: closeInputSchema,
304
+ // Extended
305
+ hover: hoverInputSchema,
306
+ back: backInputSchema,
307
+ dialog: dialogInputSchema,
308
+ wait: waitInputSchema,
309
+ tabs: tabsInputSchema,
310
+ drag: dragInputSchema,
311
+ // Escape hatch
312
+ evaluate: evaluateInputSchema
313
+ };
314
+
315
+ // src/tools/constants.ts
316
+ var BROWSER_TOOLS = {
317
+ // Core
318
+ GOTO: "browser_goto",
319
+ SNAPSHOT: "browser_snapshot",
320
+ CLICK: "browser_click",
321
+ TYPE: "browser_type",
322
+ PRESS: "browser_press",
323
+ SELECT: "browser_select",
324
+ SCROLL: "browser_scroll",
325
+ CLOSE: "browser_close",
326
+ // Extended
327
+ HOVER: "browser_hover",
328
+ BACK: "browser_back",
329
+ DIALOG: "browser_dialog",
330
+ WAIT: "browser_wait",
331
+ TABS: "browser_tabs",
332
+ DRAG: "browser_drag",
333
+ // Escape hatch
334
+ EVALUATE: "browser_evaluate"
335
+ };
336
+
337
+ // src/tools/back.ts
338
+ function createBackTool(browser) {
339
+ return tools.createTool({
340
+ id: BROWSER_TOOLS.BACK,
341
+ description: "Go back to the previous page in browser history.",
342
+ inputSchema: backInputSchema,
343
+ execute: async (_input, { agent }) => {
344
+ const threadId = agent?.threadId;
345
+ browser.setCurrentThread(threadId);
346
+ await browser.ensureReady();
347
+ return browser.back(threadId);
348
+ }
349
+ });
350
+ }
351
+ function createClickTool(browser) {
352
+ return tools.createTool({
353
+ id: BROWSER_TOOLS.CLICK,
354
+ description: "Click an element using its ref from a snapshot. Use clickCount: 2 for double-click.",
355
+ inputSchema: clickInputSchema,
356
+ execute: async (input, { agent }) => {
357
+ const threadId = agent?.threadId;
358
+ browser.setCurrentThread(threadId);
359
+ await browser.ensureReady();
360
+ return browser.click(input, threadId);
361
+ }
362
+ });
363
+ }
364
+ function createCloseTool(browser) {
365
+ return tools.createTool({
366
+ id: BROWSER_TOOLS.CLOSE,
367
+ description: "Close the browser. Only use when done with all browsing.",
368
+ inputSchema: closeInputSchema,
369
+ execute: async (_input, { agent }) => {
370
+ const threadId = agent?.threadId;
371
+ if (browser.getScope() !== "shared") {
372
+ if (!threadId) {
373
+ throw new Error("browser_close requires agent.threadId when browser scope is not shared");
374
+ }
375
+ await browser.closeThreadSession(threadId);
376
+ return { success: true, hint: "Thread's browser session closed. A new session will be created on next use." };
377
+ }
378
+ await browser.close();
379
+ return { success: true, hint: "Browser closed. It will be re-launched automatically on next use." };
380
+ }
381
+ });
382
+ }
383
+ function createDialogTool(browser) {
384
+ return tools.createTool({
385
+ id: BROWSER_TOOLS.DIALOG,
386
+ description: "Click an element that triggers a browser dialog (alert, confirm, prompt) and handle it. Use this instead of browser_click when you expect a dialog to appear.",
387
+ inputSchema: dialogInputSchema,
388
+ execute: async (input, { agent }) => {
389
+ const threadId = agent?.threadId;
390
+ browser.setCurrentThread(threadId);
391
+ await browser.ensureReady();
392
+ return browser.dialog(input, threadId);
393
+ }
394
+ });
395
+ }
396
+ function createDragTool(browser) {
397
+ return tools.createTool({
398
+ id: BROWSER_TOOLS.DRAG,
399
+ description: "Drag an element to another element. Use refs from snapshot when available, or CSS selectors for elements not exposed in the accessibility tree.",
400
+ inputSchema: dragInputSchema,
401
+ execute: async (input, { agent }) => {
402
+ const threadId = agent?.threadId;
403
+ browser.setCurrentThread(threadId);
404
+ await browser.ensureReady();
405
+ return browser.drag(input, threadId);
406
+ }
407
+ });
408
+ }
409
+ function createEvaluateTool(browser) {
410
+ return tools.createTool({
411
+ id: BROWSER_TOOLS.EVALUATE,
412
+ description: "Execute JavaScript in the browser. Use for complex interactions not covered by other tools. Returns the script result.",
413
+ inputSchema: evaluateInputSchema,
414
+ execute: async (input, { agent }) => {
415
+ const threadId = agent?.threadId;
416
+ browser.setCurrentThread(threadId);
417
+ await browser.ensureReady();
418
+ return browser.evaluate(input, threadId);
419
+ }
420
+ });
421
+ }
422
+ function createGotoTool(browser) {
423
+ return tools.createTool({
424
+ id: BROWSER_TOOLS.GOTO,
425
+ description: "Navigate the browser to a URL.",
426
+ inputSchema: gotoInputSchema,
427
+ execute: async (input, { agent }) => {
428
+ const threadId = agent?.threadId;
429
+ browser.setCurrentThread(threadId);
430
+ await browser.ensureReady();
431
+ return browser.goto(input, threadId);
432
+ }
433
+ });
434
+ }
435
+ function createHoverTool(browser) {
436
+ return tools.createTool({
437
+ id: BROWSER_TOOLS.HOVER,
438
+ description: "Hover over an element to trigger hover states (dropdowns, tooltips).",
439
+ inputSchema: hoverInputSchema,
440
+ execute: async (input, { agent }) => {
441
+ const threadId = agent?.threadId;
442
+ browser.setCurrentThread(threadId);
443
+ await browser.ensureReady();
444
+ return browser.hover(input, threadId);
445
+ }
446
+ });
447
+ }
448
+ function createPressTool(browser) {
449
+ return tools.createTool({
450
+ id: BROWSER_TOOLS.PRESS,
451
+ description: "Press a keyboard key (e.g., Enter, Tab, Escape, Control+a).",
452
+ inputSchema: pressInputSchema,
453
+ execute: async (input, { agent }) => {
454
+ const threadId = agent?.threadId;
455
+ browser.setCurrentThread(threadId);
456
+ await browser.ensureReady();
457
+ return browser.press(input, threadId);
458
+ }
459
+ });
460
+ }
461
+ function createScrollTool(browser) {
462
+ return tools.createTool({
463
+ id: BROWSER_TOOLS.SCROLL,
464
+ description: "Scroll the page or a specific element.",
465
+ inputSchema: scrollInputSchema,
466
+ execute: async (input, { agent }) => {
467
+ const threadId = agent?.threadId;
468
+ browser.setCurrentThread(threadId);
469
+ await browser.ensureReady();
470
+ return browser.scroll(input, threadId);
471
+ }
472
+ });
473
+ }
474
+ function createSelectTool(browser) {
475
+ return tools.createTool({
476
+ id: BROWSER_TOOLS.SELECT,
477
+ description: "Select an option from a dropdown by value, label, or index.",
478
+ inputSchema: selectInputSchema,
479
+ execute: async (input, { agent }) => {
480
+ const threadId = agent?.threadId;
481
+ browser.setCurrentThread(threadId);
482
+ await browser.ensureReady();
483
+ return browser.select(input, threadId);
484
+ }
485
+ });
486
+ }
487
+ function createSnapshotTool(browser) {
488
+ return tools.createTool({
489
+ id: BROWSER_TOOLS.SNAPSHOT,
490
+ description: "Get accessibility tree snapshot of the page. Returns text-based representation with element refs like [ref=e1], [ref=e2] for targeting.",
491
+ inputSchema: snapshotInputSchema,
492
+ execute: async (input, { agent }) => {
493
+ const threadId = agent?.threadId;
494
+ browser.setCurrentThread(threadId);
495
+ await browser.ensureReady();
496
+ return browser.snapshot(input, threadId);
497
+ }
498
+ });
499
+ }
500
+ function createTabsTool(browser) {
501
+ return tools.createTool({
502
+ id: BROWSER_TOOLS.TABS,
503
+ description: "Manage browser tabs: list, open new, switch, or close tabs.",
504
+ inputSchema: tabsInputSchema,
505
+ execute: async (input, { agent }) => {
506
+ const threadId = agent?.threadId;
507
+ browser.setCurrentThread(threadId);
508
+ await browser.ensureReady();
509
+ return browser.tabs(input, threadId);
510
+ }
511
+ });
512
+ }
513
+ function createTypeTool(browser) {
514
+ return tools.createTool({
515
+ id: BROWSER_TOOLS.TYPE,
516
+ description: "Type text into an input element. Use clear: true to replace existing content.",
517
+ inputSchema: typeInputSchema,
518
+ execute: async (input, { agent }) => {
519
+ const threadId = agent?.threadId;
520
+ browser.setCurrentThread(threadId);
521
+ await browser.ensureReady();
522
+ return browser.type(input, threadId);
523
+ }
524
+ });
525
+ }
526
+ function createWaitTool(browser) {
527
+ return tools.createTool({
528
+ id: BROWSER_TOOLS.WAIT,
529
+ description: "Wait for an element to appear, disappear, or reach a state.",
530
+ inputSchema: waitInputSchema,
531
+ execute: async (input, { agent }) => {
532
+ const threadId = agent?.threadId;
533
+ browser.setCurrentThread(threadId);
534
+ await browser.ensureReady();
535
+ return browser.wait(input, threadId);
536
+ }
537
+ });
538
+ }
539
+
540
+ // src/tools/index.ts
541
+ function createAgentBrowserTools(browser) {
542
+ return {
543
+ // Core (9)
544
+ [BROWSER_TOOLS.GOTO]: createGotoTool(browser),
545
+ [BROWSER_TOOLS.SNAPSHOT]: createSnapshotTool(browser),
546
+ [BROWSER_TOOLS.CLICK]: createClickTool(browser),
547
+ [BROWSER_TOOLS.TYPE]: createTypeTool(browser),
548
+ [BROWSER_TOOLS.PRESS]: createPressTool(browser),
549
+ [BROWSER_TOOLS.SELECT]: createSelectTool(browser),
550
+ [BROWSER_TOOLS.SCROLL]: createScrollTool(browser),
551
+ [BROWSER_TOOLS.CLOSE]: createCloseTool(browser),
552
+ // Extended
553
+ [BROWSER_TOOLS.HOVER]: createHoverTool(browser),
554
+ [BROWSER_TOOLS.BACK]: createBackTool(browser),
555
+ [BROWSER_TOOLS.DIALOG]: createDialogTool(browser),
556
+ [BROWSER_TOOLS.WAIT]: createWaitTool(browser),
557
+ [BROWSER_TOOLS.TABS]: createTabsTool(browser),
558
+ [BROWSER_TOOLS.DRAG]: createDragTool(browser),
559
+ // Escape hatch (1)
560
+ [BROWSER_TOOLS.EVALUATE]: createEvaluateTool(browser)
561
+ };
562
+ }
563
+
564
+ // src/agent-browser.ts
565
+ var AgentBrowser = class _AgentBrowser extends browser.MastraBrowser {
566
+ id;
567
+ name = "AgentBrowser";
568
+ provider = "vercel-labs/agent-browser";
569
+ /** Primary browser manager (for 'none' mode, also used as fallback) */
570
+ browserManager = null;
571
+ defaultTimeout = 3e4;
572
+ /** Active screencast streams per thread (for triggering reconnects on tab changes) */
573
+ activeScreencastStreams = /* @__PURE__ */ new Map();
574
+ /** Default key for shared scope */
575
+ static SHARED_STREAM_KEY = "__shared__";
576
+ constructor(config = {}) {
577
+ super(config);
578
+ this.id = `agent-browser-${Date.now()}`;
579
+ if (config.timeout) {
580
+ this.defaultTimeout = config.timeout;
581
+ }
582
+ let effectiveScope = config.scope ?? "thread";
583
+ if (config.cdpUrl && effectiveScope === "thread") {
584
+ this.logger.warn?.(
585
+ 'Browser scope "thread" is not supported when connecting via cdpUrl. Falling back to "shared" (shared browser connection).'
586
+ );
587
+ effectiveScope = "shared";
588
+ }
589
+ this.threadManager = new AgentBrowserThreadManager({
590
+ scope: effectiveScope,
591
+ browserConfig: config,
592
+ resolveCdpUrl: this.resolveCdpUrl.bind(this),
593
+ logger: this.logger,
594
+ // When a new thread session is created, notify listeners so screencast can start
595
+ onSessionCreated: (session) => {
596
+ this.notifyBrowserReady(session.threadId);
597
+ },
598
+ // When a new browser is created for a thread, set up close listener
599
+ onBrowserCreated: (manager, threadId) => {
600
+ this.setupCloseListenerForThread(manager, threadId);
601
+ }
602
+ });
603
+ }
604
+ // ---------------------------------------------------------------------------
605
+ // Thread Isolation (delegated to ThreadManager)
606
+ // ---------------------------------------------------------------------------
607
+ /**
608
+ * Ensure browser is ready and thread session exists.
609
+ * Creates a new page/context for the current thread if needed.
610
+ *
611
+ * For 'browser' isolation, we need to create the thread session BEFORE
612
+ * calling super.ensureReady() because the base class's ensureReady() will
613
+ * call checkBrowserAlive(), which needs at least one thread browser to exist.
614
+ */
615
+ async ensureReady() {
616
+ const scope = this.threadManager.getScope();
617
+ const threadId = this.getCurrentThread();
618
+ const existingSession = this.threadManager.hasSession(threadId);
619
+ if (scope === "thread" && threadId !== browser.DEFAULT_THREAD_ID && !existingSession) {
620
+ await this.getManagerForThread(threadId);
621
+ }
622
+ await super.ensureReady();
623
+ if (scope === "thread" && threadId !== browser.DEFAULT_THREAD_ID && existingSession) {
624
+ await this.getManagerForThread(threadId);
625
+ }
626
+ }
627
+ /**
628
+ * Get the browser manager for the current thread.
629
+ * Delegates to ThreadManager for isolation handling.
630
+ */
631
+ async getManagerForThread(threadId) {
632
+ const effectiveThreadId = threadId ?? this.getCurrentThread();
633
+ const scope = this.threadManager.getScope();
634
+ if (scope === "thread" && (!effectiveThreadId || effectiveThreadId === browser.DEFAULT_THREAD_ID)) {
635
+ const existingManager = this.threadManager.getExistingManagerForThread(effectiveThreadId);
636
+ if (existingManager) {
637
+ return existingManager;
638
+ }
639
+ }
640
+ return this.threadManager.getManagerForThread(effectiveThreadId);
641
+ }
642
+ /**
643
+ * Get the page for a specific thread.
644
+ * For thread-isolated modes, ensures we're on the correct context/page.
645
+ */
646
+ async getPageForThread(threadId) {
647
+ const manager = await this.getManagerForThread(threadId);
648
+ return manager.getPage();
649
+ }
650
+ /**
651
+ * Close a specific thread's browser session.
652
+ * Delegates to ThreadManager and notifies registered callbacks.
653
+ */
654
+ async closeThreadSession(threadId) {
655
+ await this.threadManager.destroySession(threadId);
656
+ this.notifyBrowserClosed(threadId);
657
+ }
658
+ // ---------------------------------------------------------------------------
659
+ // Lifecycle
660
+ // ---------------------------------------------------------------------------
661
+ async doLaunch() {
662
+ const scope = this.threadManager.getScope();
663
+ if (scope === "thread") {
664
+ this.browserManager = new agentBrowser.BrowserManager();
665
+ this.threadManager.setSharedManager(this.browserManager);
666
+ return;
667
+ }
668
+ this.browserManager = new agentBrowser.BrowserManager();
669
+ const localConfig = this.config;
670
+ const launchOptions = {
671
+ headless: localConfig.headless ?? true,
672
+ viewport: localConfig.viewport
673
+ };
674
+ if (localConfig.cdpUrl) {
675
+ launchOptions.cdpUrl = await this.resolveCdpUrl(localConfig.cdpUrl);
676
+ }
677
+ await this.browserManager.launch(launchOptions);
678
+ this.threadManager.setSharedManager(this.browserManager);
679
+ this.setupCloseListenerForNoneIsolation(this.browserManager);
680
+ }
681
+ /**
682
+ * Set up close event listeners for 'none' isolation shared browser.
683
+ * This handles the case where the shared browser is closed externally.
684
+ */
685
+ setupCloseListenerForNoneIsolation(manager) {
686
+ try {
687
+ let disconnectHandled = false;
688
+ const handleDisconnect = () => {
689
+ if (disconnectHandled) return;
690
+ disconnectHandled = true;
691
+ this.handleBrowserDisconnected();
692
+ };
693
+ const context = manager.getContext();
694
+ if (context) {
695
+ context.on("close", handleDisconnect);
696
+ }
697
+ const pages = manager.getPages();
698
+ for (const page of pages) {
699
+ page.on("close", () => {
700
+ const remainingPages = manager.getPages();
701
+ if (remainingPages.length === 0) {
702
+ handleDisconnect();
703
+ }
704
+ });
705
+ }
706
+ } catch {
707
+ }
708
+ }
709
+ async doClose() {
710
+ await this.threadManager.destroyAllSessions();
711
+ this.setCurrentThread(void 0);
712
+ const scope = this.threadManager.getScope();
713
+ if (scope === "shared" && this.browserManager) {
714
+ await this.browserManager.close();
715
+ }
716
+ this.browserManager = null;
717
+ }
718
+ /**
719
+ * Check if the browser is still alive by verifying the page is connected.
720
+ * Called by base class ensureReady() to detect externally closed browsers.
721
+ */
722
+ async checkBrowserAlive() {
723
+ const scope = this.threadManager.getScope();
724
+ if (scope === "thread") {
725
+ return this.threadManager.hasActiveThreadBrowsers();
726
+ }
727
+ if (!this.browserManager) {
728
+ return false;
729
+ }
730
+ try {
731
+ const page = this.browserManager.getPage();
732
+ const url = page.url();
733
+ if (url && url !== "about:blank") {
734
+ const state = await this.getBrowserState();
735
+ if (state) {
736
+ this.lastBrowserState = state;
737
+ }
738
+ }
739
+ return true;
740
+ } catch (error) {
741
+ const msg = error instanceof Error ? error.message : String(error);
742
+ if (this.isDisconnectionError(msg)) {
743
+ this.logger.debug?.("Browser was externally closed");
744
+ }
745
+ return false;
746
+ }
747
+ }
748
+ // ---------------------------------------------------------------------------
749
+ // Tools
750
+ // ---------------------------------------------------------------------------
751
+ /**
752
+ * Get the browser tools for this provider.
753
+ * Returns 17 flat tools for browser automation.
754
+ */
755
+ getTools() {
756
+ return createAgentBrowserTools(this);
757
+ }
758
+ // ---------------------------------------------------------------------------
759
+ // Helpers
760
+ // ---------------------------------------------------------------------------
761
+ /**
762
+ * Get the page for the current thread.
763
+ * Uses thread scope if enabled, otherwise returns the shared page.
764
+ * @param explicitThreadId - Optional thread ID to use instead of getCurrentThread()
765
+ * Use this to avoid race conditions in concurrent tool calls.
766
+ */
767
+ async getPage(explicitThreadId) {
768
+ const scope = this.getScope();
769
+ const threadId = explicitThreadId ?? this.getCurrentThread();
770
+ if (scope === "thread" || scope !== "shared" && threadId !== browser.DEFAULT_THREAD_ID) {
771
+ return this.getPageForThread(threadId);
772
+ }
773
+ if (!this.browserManager) throw new Error("Browser not launched");
774
+ return this.browserManager.getPage();
775
+ }
776
+ /**
777
+ * Handle browser disconnection by clearing internal state.
778
+ * For 'thread' scope, only notifies the specific thread's callbacks.
779
+ * For 'shared' scope, notifies all callbacks.
780
+ */
781
+ handleBrowserDisconnected() {
782
+ const scope = this.threadManager.getScope();
783
+ const threadId = this.getCurrentThread();
784
+ if (scope === "thread" && threadId !== browser.DEFAULT_THREAD_ID) {
785
+ this.threadManager.clearSession(threadId);
786
+ this.logger.debug?.(`Cleared browser session for thread: ${threadId}`);
787
+ this.notifyBrowserClosed(threadId);
788
+ } else {
789
+ this.browserManager = null;
790
+ this.threadManager.clearSharedManager();
791
+ super.handleBrowserDisconnected();
792
+ }
793
+ }
794
+ /**
795
+ * Set up close event listener for a thread's browser manager.
796
+ * This handles the case where a thread's browser is closed externally.
797
+ */
798
+ setupCloseListenerForThread(manager, threadId) {
799
+ try {
800
+ let disconnectHandled = false;
801
+ const handleDisconnect = () => {
802
+ if (disconnectHandled) return;
803
+ disconnectHandled = true;
804
+ this.handleThreadBrowserDisconnected(threadId);
805
+ };
806
+ const context = manager.getContext();
807
+ if (context) {
808
+ context.on("close", handleDisconnect);
809
+ }
810
+ const pages = manager.getPages();
811
+ for (const page of pages) {
812
+ page.on("close", () => {
813
+ const remainingPages = manager.getPages();
814
+ if (remainingPages.length === 0) {
815
+ handleDisconnect();
816
+ }
817
+ });
818
+ }
819
+ } catch {
820
+ }
821
+ }
822
+ /**
823
+ * Handle browser disconnection for a specific thread.
824
+ * Called when a thread's browser is closed externally.
825
+ */
826
+ handleThreadBrowserDisconnected(threadId) {
827
+ this.threadManager.clearSession(threadId);
828
+ this.logger.debug?.(`Cleared browser session for thread: ${threadId}`);
829
+ this.notifyBrowserClosed(threadId);
830
+ }
831
+ /**
832
+ * Create an error response from an exception.
833
+ * Extends base class to add agent-browser specific error handling.
834
+ */
835
+ createErrorFromException(error, context) {
836
+ const msg = error instanceof Error ? error.message : String(error);
837
+ if (msg.includes("stale") || msg.includes("Stale")) {
838
+ return this.createError(
839
+ "stale_ref",
840
+ "Element ref is no longer valid.",
841
+ "Get a fresh snapshot and use updated refs."
842
+ );
843
+ }
844
+ if (msg.includes("not found") || msg.includes("No element")) {
845
+ return this.createError(
846
+ "element_not_found",
847
+ "Element not found.",
848
+ "Check the ref is correct or get a fresh snapshot."
849
+ );
850
+ }
851
+ return super.createErrorFromException(error, context);
852
+ }
853
+ async requireLocator(ref, threadId) {
854
+ const manager = await this.getManagerForThread(threadId);
855
+ return manager.getLocatorFromRef(ref);
856
+ }
857
+ async getScrollInfo(threadId) {
858
+ const page = await this.getPage(threadId);
859
+ const info = await page.evaluate(`({
860
+ scrollY: Math.round(window.scrollY),
861
+ scrollHeight: document.documentElement.scrollHeight,
862
+ viewportHeight: window.innerHeight
863
+ })`);
864
+ if (!info || typeof info.scrollHeight !== "number") {
865
+ return {
866
+ scrollY: 0,
867
+ scrollHeight: 0,
868
+ viewportHeight: 0,
869
+ atTop: true,
870
+ atBottom: true,
871
+ percentDown: 0
872
+ };
873
+ }
874
+ const maxScroll = info.scrollHeight - info.viewportHeight;
875
+ return {
876
+ ...info,
877
+ atTop: info.scrollY < 50,
878
+ atBottom: info.scrollY >= maxScroll - 50,
879
+ percentDown: maxScroll > 0 ? Math.round(info.scrollY / maxScroll * 100) : 0
880
+ };
881
+ }
882
+ // ---------------------------------------------------------------------------
883
+ // URL Access
884
+ // ---------------------------------------------------------------------------
885
+ /**
886
+ * Get the current page URL without launching the browser.
887
+ * @param threadId - Optional thread ID for thread-isolated browsers
888
+ * @returns The current URL string, or null if browser is not running
889
+ */
890
+ async getCurrentUrl(threadId) {
891
+ if (!this.isBrowserRunning()) {
892
+ return null;
893
+ }
894
+ try {
895
+ const effectiveThreadId = threadId ?? this.getCurrentThread();
896
+ const scope = this.threadManager.getScope();
897
+ if (scope === "thread" && effectiveThreadId) {
898
+ const manager2 = this.threadManager.getExistingManagerForThread(effectiveThreadId);
899
+ if (!manager2) {
900
+ return null;
901
+ }
902
+ const url2 = manager2.getPage().url();
903
+ if (url2 && url2 !== "about:blank") {
904
+ const state = this.getBrowserStateForManager(manager2);
905
+ if (state) {
906
+ this.threadManager.updateBrowserState(effectiveThreadId, state);
907
+ }
908
+ }
909
+ return url2;
910
+ }
911
+ const manager = await this.getManagerForThread(threadId);
912
+ const url = manager.getPage().url();
913
+ if (url && url !== "about:blank") {
914
+ const state = this.getBrowserStateForManager(manager);
915
+ if (state) {
916
+ this.lastBrowserState = state;
917
+ }
918
+ }
919
+ return url;
920
+ } catch {
921
+ return null;
922
+ }
923
+ }
924
+ /**
925
+ * Navigate to a URL (simple form). Used internally for restoring state on relaunch.
926
+ */
927
+ async navigateTo(url) {
928
+ if (!this.isBrowserRunning()) {
929
+ return;
930
+ }
931
+ try {
932
+ const page = await this.getPage();
933
+ await page.goto(url, {
934
+ timeout: this.defaultTimeout,
935
+ waitUntil: "domcontentloaded"
936
+ });
937
+ } catch {
938
+ }
939
+ }
940
+ /**
941
+ * Get the current browser state (all tabs and active tab index).
942
+ */
943
+ async getBrowserState(threadId) {
944
+ if (!this.isBrowserRunning()) {
945
+ return null;
946
+ }
947
+ try {
948
+ const manager = await this.getManagerForThread(threadId);
949
+ return this.getBrowserStateForManager(manager);
950
+ } catch {
951
+ return null;
952
+ }
953
+ }
954
+ /**
955
+ * Get browser state from a specific manager instance.
956
+ */
957
+ getBrowserStateForManager(manager) {
958
+ try {
959
+ const pages = manager.getPages();
960
+ const activeIndex = manager.getActiveIndex();
961
+ const tabs = pages.map((page) => ({
962
+ url: page.url()
963
+ }));
964
+ return {
965
+ tabs,
966
+ activeTabIndex: activeIndex
967
+ };
968
+ } catch {
969
+ return null;
970
+ }
971
+ }
972
+ /**
973
+ * Get all open tabs with their URLs and titles.
974
+ */
975
+ async getTabState(threadId) {
976
+ const state = await this.getBrowserState(threadId);
977
+ return state?.tabs ?? [];
978
+ }
979
+ /**
980
+ * Get the active tab index.
981
+ */
982
+ async getActiveTabIndex(threadId) {
983
+ if (!this.isBrowserRunning()) {
984
+ return 0;
985
+ }
986
+ try {
987
+ const manager = await this.getManagerForThread(threadId);
988
+ return manager.getActiveIndex();
989
+ } catch {
990
+ return 0;
991
+ }
992
+ }
993
+ /**
994
+ * Update the browser state in the thread session.
995
+ * Called on navigation, tab open/close to keep state fresh.
996
+ */
997
+ updateSessionBrowserState(threadId) {
998
+ try {
999
+ const effectiveThreadId = threadId ?? this.getCurrentThread() ?? browser.DEFAULT_THREAD_ID;
1000
+ const scope = this.threadManager.getScope();
1001
+ let manager = null;
1002
+ if (scope === "thread") {
1003
+ manager = this.threadManager.getExistingManagerForThread(effectiveThreadId);
1004
+ } else {
1005
+ manager = this.browserManager;
1006
+ }
1007
+ if (manager) {
1008
+ const state = this.getBrowserStateForManager(manager);
1009
+ if (state) {
1010
+ this.threadManager.updateBrowserState(effectiveThreadId, state);
1011
+ }
1012
+ }
1013
+ } catch {
1014
+ }
1015
+ }
1016
+ // ---------------------------------------------------------------------------
1017
+ // 1. browser_goto - Navigate to URL
1018
+ // ---------------------------------------------------------------------------
1019
+ async goto(input, threadId) {
1020
+ try {
1021
+ const page = await this.getPage(threadId);
1022
+ await page.goto(input.url, {
1023
+ timeout: input.timeout ?? this.defaultTimeout,
1024
+ waitUntil: input.waitUntil ?? "domcontentloaded"
1025
+ });
1026
+ return {
1027
+ success: true,
1028
+ url: page.url(),
1029
+ title: await page.title(),
1030
+ hint: "Take a snapshot to see interactive elements and get refs."
1031
+ };
1032
+ } catch (error) {
1033
+ return this.createErrorFromException(error, "Goto");
1034
+ }
1035
+ }
1036
+ // ---------------------------------------------------------------------------
1037
+ // 2. browser_snapshot - Capture accessibility tree
1038
+ // ---------------------------------------------------------------------------
1039
+ async snapshot(input, threadId) {
1040
+ try {
1041
+ const manager = await this.getManagerForThread(threadId);
1042
+ const page = await this.getPage(threadId);
1043
+ const rawSnapshot = await manager.getSnapshot({
1044
+ interactive: input.interactiveOnly ?? true,
1045
+ compact: true
1046
+ });
1047
+ const snapshot = (rawSnapshot.tree ?? "").replace(/\[ref=(\w+)\]/g, "@$1");
1048
+ const scrollInfo = await this.getScrollInfo(threadId);
1049
+ let scrollText;
1050
+ if (scrollInfo.atTop && !scrollInfo.atBottom) {
1051
+ scrollText = "TOP - more content below";
1052
+ } else if (scrollInfo.atBottom) {
1053
+ scrollText = "BOTTOM of page";
1054
+ } else {
1055
+ scrollText = `${scrollInfo.percentDown}% down`;
1056
+ }
1057
+ const refs = snapshot.match(/@e\d+/g) || [];
1058
+ const elementCount = new Set(refs).size;
1059
+ return {
1060
+ success: true,
1061
+ snapshot,
1062
+ url: page.url(),
1063
+ title: await page.title(),
1064
+ elementCount,
1065
+ scroll: scrollText,
1066
+ hint: elementCount === 0 ? "No interactive elements found. Try scrolling or setting interactiveOnly:false." : void 0
1067
+ };
1068
+ } catch (error) {
1069
+ return this.createErrorFromException(error, "Snapshot");
1070
+ }
1071
+ }
1072
+ // ---------------------------------------------------------------------------
1073
+ // 3. browser_click - Click on element
1074
+ // ---------------------------------------------------------------------------
1075
+ async click(input, threadId) {
1076
+ try {
1077
+ const page = await this.getPage(threadId);
1078
+ const locator = await this.requireLocator(input.ref, threadId);
1079
+ if (!locator) {
1080
+ return this.createError(
1081
+ "stale_ref",
1082
+ `Ref ${input.ref} not found. The page has changed.`,
1083
+ "Take a new snapshot to see the current page state and get fresh refs."
1084
+ );
1085
+ }
1086
+ await locator.click({
1087
+ button: input.button ?? "left",
1088
+ clickCount: input.clickCount ?? 1,
1089
+ modifiers: input.modifiers,
1090
+ timeout: this.defaultTimeout
1091
+ });
1092
+ return {
1093
+ success: true,
1094
+ url: page.url(),
1095
+ hint: "Take a new snapshot to see updated page state and get fresh refs."
1096
+ };
1097
+ } catch (error) {
1098
+ const errorMsg = error instanceof Error ? error.message : String(error);
1099
+ if (errorMsg.includes("intercepts pointer events")) {
1100
+ return this.createError(
1101
+ "element_blocked",
1102
+ `Element ${input.ref} is blocked by another element.`,
1103
+ "Take a new snapshot to see what is blocking. Dismiss any modals or scroll the element into view."
1104
+ );
1105
+ }
1106
+ return this.createErrorFromException(error, "Click");
1107
+ }
1108
+ }
1109
+ // ---------------------------------------------------------------------------
1110
+ // 4. browser_type - Type text into element
1111
+ // ---------------------------------------------------------------------------
1112
+ async type(input, threadId) {
1113
+ try {
1114
+ const page = await this.getPage(threadId);
1115
+ const locator = await this.requireLocator(input.ref, threadId);
1116
+ if (!locator) {
1117
+ return this.createError(
1118
+ "stale_ref",
1119
+ `Ref ${input.ref} not found. The page has changed.`,
1120
+ "Take a new snapshot to see the current page state and get fresh refs."
1121
+ );
1122
+ }
1123
+ if (input.clear) {
1124
+ await locator.fill("", { timeout: this.defaultTimeout });
1125
+ }
1126
+ if (input.delay) {
1127
+ await locator.focus();
1128
+ for (const char of input.text) {
1129
+ await page.keyboard.press(char);
1130
+ await new Promise((r) => setTimeout(r, input.delay));
1131
+ }
1132
+ } else {
1133
+ await locator.fill(input.text, { timeout: this.defaultTimeout });
1134
+ }
1135
+ const value = await locator.inputValue({ timeout: 1e3 }).catch(() => input.text);
1136
+ return {
1137
+ success: true,
1138
+ value,
1139
+ url: page.url(),
1140
+ hint: "Take a new snapshot if you need to interact with more elements."
1141
+ };
1142
+ } catch (error) {
1143
+ const errorMsg = error instanceof Error ? error.message : String(error);
1144
+ if (errorMsg.includes("is not an <input>") || errorMsg.includes("not an input") || errorMsg.includes("Cannot type") || errorMsg.includes("not focusable")) {
1145
+ return this.createError(
1146
+ "not_focusable",
1147
+ `Element ${input.ref} is not a text input field.`,
1148
+ 'Take a new snapshot and look for elements with role "textbox" or "searchbox".'
1149
+ );
1150
+ }
1151
+ return this.createErrorFromException(error, "Type");
1152
+ }
1153
+ }
1154
+ // ---------------------------------------------------------------------------
1155
+ // 5. browser_press - Press keyboard key(s)
1156
+ // ---------------------------------------------------------------------------
1157
+ async press(input, threadId) {
1158
+ try {
1159
+ const page = await this.getPage(threadId);
1160
+ await page.keyboard.press(input.key);
1161
+ return {
1162
+ success: true,
1163
+ url: page.url(),
1164
+ hint: "Take a new snapshot if the page may have changed."
1165
+ };
1166
+ } catch (error) {
1167
+ return this.createErrorFromException(error, "Press");
1168
+ }
1169
+ }
1170
+ // ---------------------------------------------------------------------------
1171
+ // 6. browser_select - Select dropdown option
1172
+ // ---------------------------------------------------------------------------
1173
+ async select(input, threadId) {
1174
+ try {
1175
+ const page = await this.getPage(threadId);
1176
+ const locator = await this.requireLocator(input.ref, threadId);
1177
+ if (!locator) {
1178
+ return this.createError(
1179
+ "stale_ref",
1180
+ `Ref ${input.ref} not found. The page has changed.`,
1181
+ "Take a new snapshot to get fresh refs."
1182
+ );
1183
+ }
1184
+ const selectValue = {};
1185
+ if (input.value) selectValue.value = input.value;
1186
+ if (input.label) selectValue.label = input.label;
1187
+ if (input.index !== void 0) selectValue.index = input.index;
1188
+ const selected = await locator.selectOption(selectValue, {
1189
+ timeout: this.defaultTimeout
1190
+ });
1191
+ return {
1192
+ success: true,
1193
+ selected,
1194
+ url: page.url(),
1195
+ hint: "Selection complete. Take a snapshot if you need to continue."
1196
+ };
1197
+ } catch (error) {
1198
+ return this.createErrorFromException(error, "Select");
1199
+ }
1200
+ }
1201
+ // ---------------------------------------------------------------------------
1202
+ // 7. browser_scroll - Scroll page or element
1203
+ // ---------------------------------------------------------------------------
1204
+ async scroll(input, threadId) {
1205
+ try {
1206
+ const page = await this.getPage(threadId);
1207
+ if (input.ref) {
1208
+ const locator = await this.requireLocator(input.ref, threadId);
1209
+ if (locator) {
1210
+ await locator.scrollIntoViewIfNeeded({ timeout: this.defaultTimeout });
1211
+ }
1212
+ } else {
1213
+ const direction = input.direction;
1214
+ const amount = input.amount ?? 300;
1215
+ let deltaX = 0;
1216
+ let deltaY = 0;
1217
+ switch (direction) {
1218
+ case "up":
1219
+ deltaY = -amount;
1220
+ break;
1221
+ case "down":
1222
+ deltaY = amount;
1223
+ break;
1224
+ case "left":
1225
+ deltaX = -amount;
1226
+ break;
1227
+ case "right":
1228
+ deltaX = amount;
1229
+ break;
1230
+ }
1231
+ await page.evaluate(
1232
+ ({ x, y }) => {
1233
+ globalThis.scrollBy(x, y);
1234
+ },
1235
+ { x: deltaX, y: deltaY }
1236
+ );
1237
+ }
1238
+ const scrollInfo = await this.getScrollInfo(threadId);
1239
+ let scrollText;
1240
+ if (scrollInfo.atTop && !scrollInfo.atBottom) {
1241
+ scrollText = "TOP - more content below";
1242
+ } else if (scrollInfo.atBottom) {
1243
+ scrollText = "BOTTOM of page";
1244
+ } else {
1245
+ scrollText = `${scrollInfo.percentDown}% down`;
1246
+ }
1247
+ return {
1248
+ success: true,
1249
+ position: { x: 0, y: scrollInfo.scrollY },
1250
+ scroll: scrollText,
1251
+ hint: "Take a new snapshot to see elements in the new viewport."
1252
+ };
1253
+ } catch (error) {
1254
+ return this.createErrorFromException(error, "Scroll");
1255
+ }
1256
+ }
1257
+ // ---------------------------------------------------------------------------
1258
+ // 8. browser_hover - Hover over element
1259
+ // ---------------------------------------------------------------------------
1260
+ async hover(input, threadId) {
1261
+ try {
1262
+ const page = await this.getPage(threadId);
1263
+ const locator = await this.requireLocator(input.ref, threadId);
1264
+ if (!locator) {
1265
+ return this.createError(
1266
+ "stale_ref",
1267
+ `Ref ${input.ref} not found. The page has changed.`,
1268
+ "Take a new snapshot to get fresh refs."
1269
+ );
1270
+ }
1271
+ await locator.hover({ timeout: this.defaultTimeout });
1272
+ return {
1273
+ success: true,
1274
+ url: page.url(),
1275
+ hint: "Take a new snapshot to see any hover-triggered elements (dropdowns, tooltips)."
1276
+ };
1277
+ } catch (error) {
1278
+ return this.createErrorFromException(error, "Hover");
1279
+ }
1280
+ }
1281
+ // ---------------------------------------------------------------------------
1282
+ // 10. browser_back - Navigate back
1283
+ // ---------------------------------------------------------------------------
1284
+ async back(threadId) {
1285
+ try {
1286
+ const page = await this.getPage(threadId);
1287
+ await page.goBack({ timeout: this.defaultTimeout });
1288
+ return {
1289
+ success: true,
1290
+ url: page.url(),
1291
+ title: await page.title(),
1292
+ hint: "Take a new snapshot to see the previous page."
1293
+ };
1294
+ } catch (error) {
1295
+ return this.createErrorFromException(error, "Back");
1296
+ }
1297
+ }
1298
+ // ---------------------------------------------------------------------------
1299
+ // 11. browser_dialog - Click element that triggers dialog and handle it
1300
+ // ---------------------------------------------------------------------------
1301
+ async dialog(input, threadId) {
1302
+ try {
1303
+ const page = await this.getPage(threadId);
1304
+ const locator = await this.requireLocator(input.triggerRef, threadId);
1305
+ if (!locator) {
1306
+ return this.createError(
1307
+ "stale_ref",
1308
+ `Trigger ref ${input.triggerRef} not found.`,
1309
+ "Take a new snapshot to get fresh refs."
1310
+ );
1311
+ }
1312
+ return new Promise((resolve, reject) => {
1313
+ const timeout = setTimeout(() => {
1314
+ page.off("dialog", dialogHandler);
1315
+ reject(
1316
+ new Error(`No dialog appeared after clicking ${input.triggerRef}. The element may not trigger a dialog.`)
1317
+ );
1318
+ }, this.defaultTimeout);
1319
+ const dialogHandler = async (dialog) => {
1320
+ clearTimeout(timeout);
1321
+ try {
1322
+ const dialogType = dialog.type();
1323
+ const message = dialog.message();
1324
+ if (input.action === "accept") {
1325
+ await dialog.accept(input.text);
1326
+ } else {
1327
+ await dialog.dismiss();
1328
+ }
1329
+ resolve({
1330
+ success: true,
1331
+ action: input.action,
1332
+ dialogType,
1333
+ message,
1334
+ hint: "Dialog handled. Take a snapshot to continue."
1335
+ });
1336
+ } catch (e) {
1337
+ reject(e);
1338
+ }
1339
+ };
1340
+ page.once("dialog", dialogHandler);
1341
+ locator.click({ timeout: this.defaultTimeout }).catch((e) => {
1342
+ clearTimeout(timeout);
1343
+ page.off("dialog", dialogHandler);
1344
+ reject(e);
1345
+ });
1346
+ });
1347
+ } catch (error) {
1348
+ return this.createErrorFromException(error, "Dialog");
1349
+ }
1350
+ }
1351
+ // ---------------------------------------------------------------------------
1352
+ // 13. browser_wait - Wait for element or condition
1353
+ // ---------------------------------------------------------------------------
1354
+ async wait(input, threadId) {
1355
+ try {
1356
+ const timeout = input.timeout ?? this.defaultTimeout;
1357
+ if (input.ref) {
1358
+ const locator = await this.requireLocator(input.ref, threadId);
1359
+ if (!locator) {
1360
+ return this.createError("stale_ref", `Ref ${input.ref} not found.`, "Take a new snapshot to get fresh refs.");
1361
+ }
1362
+ const state = input.state ?? "visible";
1363
+ await locator.waitFor({ state, timeout });
1364
+ return {
1365
+ success: true,
1366
+ hint: `Element is now ${state}. Take a snapshot to continue.`
1367
+ };
1368
+ } else {
1369
+ const page = await this.getPage(threadId);
1370
+ await page.waitForTimeout(timeout);
1371
+ return {
1372
+ success: true,
1373
+ hint: "Wait complete. Take a snapshot to see current state."
1374
+ };
1375
+ }
1376
+ } catch (error) {
1377
+ return this.createErrorFromException(error, "Wait");
1378
+ }
1379
+ }
1380
+ // ---------------------------------------------------------------------------
1381
+ // 14. browser_tabs - Manage browser tabs
1382
+ // ---------------------------------------------------------------------------
1383
+ async tabs(input, threadId) {
1384
+ try {
1385
+ const browser = await this.getManagerForThread(threadId);
1386
+ if (!browser) {
1387
+ return this.createError(
1388
+ "browser_closed",
1389
+ "Browser not launched",
1390
+ "Call a navigation tool first to launch the browser."
1391
+ );
1392
+ }
1393
+ switch (input.action) {
1394
+ case "list": {
1395
+ if (!browser.listTabs) {
1396
+ return this.createError(
1397
+ "browser_error",
1398
+ "Tab management not supported",
1399
+ "This browser provider does not support tab management."
1400
+ );
1401
+ }
1402
+ const tabsList = await browser.listTabs();
1403
+ return {
1404
+ success: true,
1405
+ tabs: tabsList,
1406
+ hint: 'Use browser_tabs with action:"switch" and index to change tabs.'
1407
+ };
1408
+ }
1409
+ case "new": {
1410
+ if (!browser.newTab) {
1411
+ return this.createError(
1412
+ "browser_error",
1413
+ "Tab management not supported",
1414
+ "This browser provider does not support tab management."
1415
+ );
1416
+ }
1417
+ const result = await browser.newTab();
1418
+ if (input.url) {
1419
+ const page = await this.getPage(threadId);
1420
+ await page.goto(input.url);
1421
+ }
1422
+ this.updateSessionBrowserState(threadId);
1423
+ return {
1424
+ success: true,
1425
+ ...result,
1426
+ hint: "New tab opened. Take a snapshot to see its content."
1427
+ };
1428
+ }
1429
+ case "switch": {
1430
+ if (!browser.switchTo) {
1431
+ return this.createError(
1432
+ "browser_error",
1433
+ "Tab management not supported",
1434
+ "This browser provider does not support tab management."
1435
+ );
1436
+ }
1437
+ await browser.switchTo(input.index);
1438
+ await this.reconnectScreencast("tab switch");
1439
+ const page = browser.getPage();
1440
+ const pageUrl = page.url();
1441
+ const streamKey = this.getStreamKey(this.getCurrentThread());
1442
+ const stream = this.activeScreencastStreams.get(streamKey);
1443
+ if (pageUrl && stream?.isActive()) {
1444
+ stream.emitUrl(pageUrl);
1445
+ }
1446
+ this.updateSessionBrowserState(threadId);
1447
+ return {
1448
+ success: true,
1449
+ index: input.index,
1450
+ url: pageUrl,
1451
+ title: await page.title(),
1452
+ hint: "Tab switched. Take a snapshot to see its content."
1453
+ };
1454
+ }
1455
+ case "close": {
1456
+ if (!browser.closeTab) {
1457
+ return this.createError(
1458
+ "browser_error",
1459
+ "Tab management not supported",
1460
+ "This browser provider does not support tab management."
1461
+ );
1462
+ }
1463
+ await browser.closeTab(input.index);
1464
+ await this.reconnectScreencast("tab close");
1465
+ this.updateSessionBrowserState(threadId);
1466
+ const tabsList = await browser.listTabs?.() ?? [];
1467
+ return {
1468
+ success: true,
1469
+ remaining: tabsList.length,
1470
+ hint: tabsList.length > 0 ? "Tab closed. Take a snapshot to see current tab." : "All tabs closed."
1471
+ };
1472
+ }
1473
+ default:
1474
+ return this.createError(
1475
+ "browser_error",
1476
+ `Unknown tabs action: ${input.action}`,
1477
+ 'Use "list", "new", "switch", or "close".'
1478
+ );
1479
+ }
1480
+ } catch (error) {
1481
+ return this.createErrorFromException(error, "Tabs");
1482
+ }
1483
+ }
1484
+ // ---------------------------------------------------------------------------
1485
+ // 15. browser_drag - Drag element to target
1486
+ // ---------------------------------------------------------------------------
1487
+ async drag(input, threadId) {
1488
+ try {
1489
+ const page = await this.getPage(threadId);
1490
+ let sourceLocator = null;
1491
+ if (input.sourceRef) {
1492
+ sourceLocator = await this.requireLocator(input.sourceRef, threadId);
1493
+ } else if (input.sourceSelector) {
1494
+ sourceLocator = page.locator(input.sourceSelector);
1495
+ }
1496
+ if (!sourceLocator) {
1497
+ return this.createError(
1498
+ "stale_ref",
1499
+ input.sourceRef ? `Source ref ${input.sourceRef} not found.` : "No source element specified. Provide sourceRef or sourceSelector.",
1500
+ input.sourceRef ? "Take a new snapshot to get fresh refs, or use sourceSelector for elements not in the accessibility tree." : void 0
1501
+ );
1502
+ }
1503
+ let targetLocator = null;
1504
+ if (input.targetRef) {
1505
+ targetLocator = await this.requireLocator(input.targetRef, threadId);
1506
+ } else if (input.targetSelector) {
1507
+ targetLocator = page.locator(input.targetSelector);
1508
+ }
1509
+ if (!targetLocator) {
1510
+ return this.createError(
1511
+ "stale_ref",
1512
+ input.targetRef ? `Target ref ${input.targetRef} not found.` : "No target element specified. Provide targetRef or targetSelector.",
1513
+ input.targetRef ? "Take a new snapshot to get fresh refs, or use targetSelector for elements not in the accessibility tree." : void 0
1514
+ );
1515
+ }
1516
+ await sourceLocator.dragTo(targetLocator, { timeout: this.defaultTimeout });
1517
+ return {
1518
+ success: true,
1519
+ url: page.url(),
1520
+ hint: "Drag complete. Take a snapshot to see the result."
1521
+ };
1522
+ } catch (error) {
1523
+ return this.createErrorFromException(error, "Drag");
1524
+ }
1525
+ }
1526
+ // ---------------------------------------------------------------------------
1527
+ // 16. browser_evaluate - Execute JavaScript
1528
+ // ---------------------------------------------------------------------------
1529
+ async evaluate(input, threadId) {
1530
+ try {
1531
+ const page = await this.getPage(threadId);
1532
+ const wrappedScript = `(async () => { ${input.script} })()`;
1533
+ const result = await page.evaluate(wrappedScript);
1534
+ return {
1535
+ success: true,
1536
+ result,
1537
+ hint: "JavaScript executed. Take a snapshot if the page may have changed."
1538
+ };
1539
+ } catch (error) {
1540
+ return this.createErrorFromException(error, "Evaluate");
1541
+ }
1542
+ }
1543
+ // ---------------------------------------------------------------------------
1544
+ // 17. browser_close - Close browser
1545
+ // ---------------------------------------------------------------------------
1546
+ async closeBrowser() {
1547
+ try {
1548
+ await this.close();
1549
+ return {
1550
+ success: true,
1551
+ hint: "Browser closed. Call browser_goto to start a new session."
1552
+ };
1553
+ } catch (error) {
1554
+ return this.createErrorFromException(error, "Close");
1555
+ }
1556
+ }
1557
+ // ---------------------------------------------------------------------------
1558
+ // Screencast (for Studio live view)
1559
+ // ---------------------------------------------------------------------------
1560
+ /**
1561
+ * Get the stream key for a thread (or shared key for shared scope).
1562
+ */
1563
+ getStreamKey(threadId) {
1564
+ return threadId || _AgentBrowser.SHARED_STREAM_KEY;
1565
+ }
1566
+ /**
1567
+ * Trigger a screencast reconnect after tab changes.
1568
+ * Called internally when tabs are switched or closed.
1569
+ */
1570
+ async reconnectScreencast(_reason) {
1571
+ const threadId = this.getCurrentThread();
1572
+ const streamKey = this.getStreamKey(threadId);
1573
+ const stream = this.activeScreencastStreams.get(streamKey);
1574
+ if (stream?.isActive()) {
1575
+ await new Promise((resolve) => setTimeout(resolve, 150));
1576
+ if (stream?.isActive()) {
1577
+ try {
1578
+ await stream.reconnect();
1579
+ const manager = this.threadManager.getExistingManagerForThread(threadId) ?? this.browserManager;
1580
+ const activePage = manager?.getPage();
1581
+ if (activePage) {
1582
+ const url = activePage.url();
1583
+ if (url) {
1584
+ stream.emitUrl(url);
1585
+ }
1586
+ }
1587
+ } catch (err) {
1588
+ console.error("[AgentBrowser] Failed to reconnect screencast:", err);
1589
+ }
1590
+ }
1591
+ }
1592
+ }
1593
+ async startScreencast(_options) {
1594
+ const threadId = _options?.threadId;
1595
+ let browserManager;
1596
+ if (this.getScope() === "thread" && threadId) {
1597
+ browserManager = await this.getManagerForThread(threadId);
1598
+ } else {
1599
+ if (!this.browserManager) throw new Error("Browser not launched");
1600
+ browserManager = this.browserManager;
1601
+ }
1602
+ const provider = {
1603
+ getCdpSession: async () => {
1604
+ const currentPage = browserManager.getPage();
1605
+ if (!currentPage) {
1606
+ throw new Error("No active page available");
1607
+ }
1608
+ const cdpSession = await currentPage.context().newCDPSession(currentPage);
1609
+ return cdpSession;
1610
+ },
1611
+ isBrowserRunning: () => browserManager.isLaunched()
1612
+ };
1613
+ const stream = new browser.ScreencastStreamImpl(provider, _options);
1614
+ const streamKey = this.getStreamKey(threadId);
1615
+ this.activeScreencastStreams.set(streamKey, stream);
1616
+ const context = browserManager.getContext();
1617
+ if (context) {
1618
+ const onNewPage = (_newPage) => {
1619
+ setTimeout(() => {
1620
+ if (stream.isActive()) {
1621
+ stream.reconnect().catch(() => {
1622
+ });
1623
+ }
1624
+ }, 100);
1625
+ };
1626
+ context.on("page", onNewPage);
1627
+ const pageCloseHandlers = /* @__PURE__ */ new Map();
1628
+ const frameNavigatedHandlers = /* @__PURE__ */ new Map();
1629
+ const setupPageListeners = (page) => {
1630
+ const onFrameNavigated = (frame) => {
1631
+ if (!frame.parentFrame()) {
1632
+ stream.emitUrl(frame.url());
1633
+ this.updateSessionBrowserState(threadId);
1634
+ }
1635
+ };
1636
+ page.on("framenavigated", onFrameNavigated);
1637
+ frameNavigatedHandlers.set(page, onFrameNavigated);
1638
+ const onClose = () => {
1639
+ pageCloseHandlers.delete(page);
1640
+ const navHandler = frameNavigatedHandlers.get(page);
1641
+ if (navHandler) {
1642
+ page.off("framenavigated", navHandler);
1643
+ frameNavigatedHandlers.delete(page);
1644
+ }
1645
+ setTimeout(() => {
1646
+ const remainingPages = browserManager.getPages();
1647
+ if (stream.isActive() && remainingPages.length > 0) {
1648
+ stream.reconnect().catch(() => {
1649
+ });
1650
+ const activePage = remainingPages[browserManager.getActiveIndex()] || remainingPages[0];
1651
+ if (activePage) {
1652
+ const url = activePage.url();
1653
+ if (url && url !== "about:blank") {
1654
+ stream.emitUrl(url);
1655
+ }
1656
+ }
1657
+ }
1658
+ }, 100);
1659
+ };
1660
+ page.once("close", onClose);
1661
+ pageCloseHandlers.set(page, onClose);
1662
+ };
1663
+ const setupPageCloseListener = setupPageListeners;
1664
+ for (const page of browserManager.getPages()) {
1665
+ setupPageCloseListener(page);
1666
+ }
1667
+ const onNewPageWithCloseListener = (newPage) => {
1668
+ setupPageCloseListener(newPage);
1669
+ const url = newPage.url();
1670
+ if (url && url !== "about:blank") {
1671
+ stream.emitUrl(url);
1672
+ }
1673
+ onNewPage();
1674
+ };
1675
+ context.off("page", onNewPage);
1676
+ context.on("page", onNewPageWithCloseListener);
1677
+ stream.once("stop", () => {
1678
+ context.off("page", onNewPageWithCloseListener);
1679
+ for (const [page, handler] of pageCloseHandlers) {
1680
+ page.off("close", handler);
1681
+ }
1682
+ pageCloseHandlers.clear();
1683
+ for (const [page, handler] of frameNavigatedHandlers) {
1684
+ page.off("framenavigated", handler);
1685
+ }
1686
+ frameNavigatedHandlers.clear();
1687
+ this.activeScreencastStreams.delete(streamKey);
1688
+ });
1689
+ }
1690
+ await stream.start();
1691
+ return stream;
1692
+ }
1693
+ // ---------------------------------------------------------------------------
1694
+ // Event Injection (for Studio live view interactivity)
1695
+ // ---------------------------------------------------------------------------
1696
+ async injectMouseEvent(event, threadId) {
1697
+ const effectiveThreadId = threadId ?? this.getCurrentThread();
1698
+ const manager = await this.getManagerForThread(effectiveThreadId);
1699
+ await manager.injectMouseEvent(event);
1700
+ }
1701
+ async injectKeyboardEvent(event, threadId) {
1702
+ const effectiveThreadId = threadId ?? this.getCurrentThread();
1703
+ const manager = await this.getManagerForThread(effectiveThreadId);
1704
+ const cdp = await manager.getCDPSession();
1705
+ await cdp.send("Input.dispatchKeyEvent", {
1706
+ type: event.type,
1707
+ key: event.key,
1708
+ code: event.code,
1709
+ text: event.text,
1710
+ modifiers: event.modifiers ?? 0,
1711
+ windowsVirtualKeyCode: event.windowsVirtualKeyCode
1712
+ });
1713
+ }
1714
+ };
1715
+
1716
+ exports.AgentBrowser = AgentBrowser;
1717
+ exports.BROWSER_TOOLS = BROWSER_TOOLS;
1718
+ exports.backInputSchema = backInputSchema;
1719
+ exports.browserSchemas = browserSchemas;
1720
+ exports.clickInputSchema = clickInputSchema;
1721
+ exports.closeInputSchema = closeInputSchema;
1722
+ exports.createAgentBrowserTools = createAgentBrowserTools;
1723
+ exports.dialogInputSchema = dialogInputSchema;
1724
+ exports.dragInputSchema = dragInputSchema;
1725
+ exports.evaluateInputSchema = evaluateInputSchema;
1726
+ exports.gotoInputSchema = gotoInputSchema;
1727
+ exports.hoverInputSchema = hoverInputSchema;
1728
+ exports.pressInputSchema = pressInputSchema;
1729
+ exports.scrollInputSchema = scrollInputSchema;
1730
+ exports.selectInputSchema = selectInputSchema;
1731
+ exports.snapshotInputSchema = snapshotInputSchema;
1732
+ exports.tabsInputSchema = tabsInputSchema;
1733
+ exports.typeInputSchema = typeInputSchema;
1734
+ exports.waitInputSchema = waitInputSchema;
1735
+ //# sourceMappingURL=index.cjs.map
1736
+ //# sourceMappingURL=index.cjs.map