@mastra/agent-browser 0.2.1-alpha.1 → 0.2.2-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -170,6 +170,13 @@ declare const dragInputSchema: z.ZodObject<{
170
170
  targetSelector: z.ZodOptional<z.ZodString>;
171
171
  }, z.core.$strip>;
172
172
  type DragInput = z.output<typeof dragInputSchema>;
173
+ /**
174
+ * browser_screenshot - Capture a screenshot of the current page
175
+ */
176
+ declare const screenshotInputSchema: z.ZodObject<{
177
+ fullPage: z.ZodOptional<z.ZodBoolean>;
178
+ }, z.core.$strip>;
179
+ type ScreenshotInput = z.output<typeof screenshotInputSchema>;
173
180
  /**
174
181
  * browser_evaluate - Execute JavaScript in the browser
175
182
  */
@@ -277,12 +284,38 @@ declare const browserSchemas: {
277
284
  sourceSelector: z.ZodOptional<z.ZodString>;
278
285
  targetSelector: z.ZodOptional<z.ZodString>;
279
286
  }, z.core.$strip>;
287
+ readonly screenshot: z.ZodObject<{
288
+ fullPage: z.ZodOptional<z.ZodBoolean>;
289
+ }, z.core.$strip>;
280
290
  readonly evaluate: z.ZodObject<{
281
291
  script: z.ZodString;
282
292
  arg: z.ZodOptional<z.ZodUnknown>;
283
293
  }, z.core.$strip>;
284
294
  };
285
295
 
296
+ /**
297
+ * Browser Tool Constants
298
+ */
299
+ declare const BROWSER_TOOLS: {
300
+ readonly GOTO: "browser_goto";
301
+ readonly SNAPSHOT: "browser_snapshot";
302
+ readonly CLICK: "browser_click";
303
+ readonly TYPE: "browser_type";
304
+ readonly PRESS: "browser_press";
305
+ readonly SELECT: "browser_select";
306
+ readonly SCROLL: "browser_scroll";
307
+ readonly CLOSE: "browser_close";
308
+ readonly HOVER: "browser_hover";
309
+ readonly BACK: "browser_back";
310
+ readonly DIALOG: "browser_dialog";
311
+ readonly WAIT: "browser_wait";
312
+ readonly TABS: "browser_tabs";
313
+ readonly DRAG: "browser_drag";
314
+ readonly SCREENSHOT: "browser_screenshot";
315
+ readonly EVALUATE: "browser_evaluate";
316
+ };
317
+ type BrowserToolName = (typeof BROWSER_TOOLS)[keyof typeof BROWSER_TOOLS];
318
+
286
319
  /**
287
320
  * AgentBrowser-specific configuration extensions.
288
321
  */
@@ -300,6 +333,17 @@ interface AgentBrowserConfigExtensions {
300
333
  * ```
301
334
  */
302
335
  storageState?: string;
336
+ /**
337
+ * Tool names to exclude from the browser toolset.
338
+ * Use this to disable specific tools, e.g. `['browser_screenshot']`
339
+ * to skip the screenshot tool for models that don't support vision.
340
+ *
341
+ * @example
342
+ * ```ts
343
+ * new AgentBrowser({ excludeTools: ['browser_screenshot'] })
344
+ * ```
345
+ */
346
+ excludeTools?: BrowserToolName[];
303
347
  }
304
348
  /**
305
349
  * Configuration options for AgentBrowser.
@@ -387,6 +431,7 @@ declare class AgentBrowser extends MastraBrowser {
387
431
  private pidLookups;
388
432
  /** Thread manager - narrowed type from base class */
389
433
  protected threadManager: AgentBrowserThreadManager;
434
+ private browserConfig;
390
435
  constructor(config?: BrowserConfig);
391
436
  /**
392
437
  * Ensure browser is ready and thread session exists.
@@ -416,7 +461,7 @@ declare class AgentBrowser extends MastraBrowser {
416
461
  protected checkBrowserAlive(): Promise<boolean>;
417
462
  /**
418
463
  * Get the browser tools for this provider.
419
- * Returns 17 flat tools for browser automation.
464
+ * Returns 16 flat tools for browser automation.
420
465
  */
421
466
  getTools(): Record<string, Tool<any, any>>;
422
467
  /**
@@ -497,6 +542,11 @@ declare class AgentBrowser extends MastraBrowser {
497
542
  scroll: string;
498
543
  hint?: string;
499
544
  } | BrowserToolError>;
545
+ screenshot(input: ScreenshotInput, threadId?: string): Promise<{
546
+ base64: string;
547
+ url: string;
548
+ title: string;
549
+ } | BrowserToolError>;
500
550
  click(input: ClickInput, threadId?: string): Promise<{
501
551
  success: true;
502
552
  url: string;
@@ -589,28 +639,6 @@ declare class AgentBrowser extends MastraBrowser {
589
639
  */
590
640
  declare function getBrowserPid(manager: BrowserManager): Promise<number | undefined>;
591
641
 
592
- /**
593
- * Browser Tool Constants
594
- */
595
- declare const BROWSER_TOOLS: {
596
- readonly GOTO: "browser_goto";
597
- readonly SNAPSHOT: "browser_snapshot";
598
- readonly CLICK: "browser_click";
599
- readonly TYPE: "browser_type";
600
- readonly PRESS: "browser_press";
601
- readonly SELECT: "browser_select";
602
- readonly SCROLL: "browser_scroll";
603
- readonly CLOSE: "browser_close";
604
- readonly HOVER: "browser_hover";
605
- readonly BACK: "browser_back";
606
- readonly DIALOG: "browser_dialog";
607
- readonly WAIT: "browser_wait";
608
- readonly TABS: "browser_tabs";
609
- readonly DRAG: "browser_drag";
610
- readonly EVALUATE: "browser_evaluate";
611
- };
612
- type BrowserToolName = (typeof BROWSER_TOOLS)[keyof typeof BROWSER_TOOLS];
613
-
614
642
  /**
615
643
  * AgentBrowser Tools
616
644
  *
package/dist/index.d.ts CHANGED
@@ -170,6 +170,13 @@ declare const dragInputSchema: z.ZodObject<{
170
170
  targetSelector: z.ZodOptional<z.ZodString>;
171
171
  }, z.core.$strip>;
172
172
  type DragInput = z.output<typeof dragInputSchema>;
173
+ /**
174
+ * browser_screenshot - Capture a screenshot of the current page
175
+ */
176
+ declare const screenshotInputSchema: z.ZodObject<{
177
+ fullPage: z.ZodOptional<z.ZodBoolean>;
178
+ }, z.core.$strip>;
179
+ type ScreenshotInput = z.output<typeof screenshotInputSchema>;
173
180
  /**
174
181
  * browser_evaluate - Execute JavaScript in the browser
175
182
  */
@@ -277,12 +284,38 @@ declare const browserSchemas: {
277
284
  sourceSelector: z.ZodOptional<z.ZodString>;
278
285
  targetSelector: z.ZodOptional<z.ZodString>;
279
286
  }, z.core.$strip>;
287
+ readonly screenshot: z.ZodObject<{
288
+ fullPage: z.ZodOptional<z.ZodBoolean>;
289
+ }, z.core.$strip>;
280
290
  readonly evaluate: z.ZodObject<{
281
291
  script: z.ZodString;
282
292
  arg: z.ZodOptional<z.ZodUnknown>;
283
293
  }, z.core.$strip>;
284
294
  };
285
295
 
296
+ /**
297
+ * Browser Tool Constants
298
+ */
299
+ declare const BROWSER_TOOLS: {
300
+ readonly GOTO: "browser_goto";
301
+ readonly SNAPSHOT: "browser_snapshot";
302
+ readonly CLICK: "browser_click";
303
+ readonly TYPE: "browser_type";
304
+ readonly PRESS: "browser_press";
305
+ readonly SELECT: "browser_select";
306
+ readonly SCROLL: "browser_scroll";
307
+ readonly CLOSE: "browser_close";
308
+ readonly HOVER: "browser_hover";
309
+ readonly BACK: "browser_back";
310
+ readonly DIALOG: "browser_dialog";
311
+ readonly WAIT: "browser_wait";
312
+ readonly TABS: "browser_tabs";
313
+ readonly DRAG: "browser_drag";
314
+ readonly SCREENSHOT: "browser_screenshot";
315
+ readonly EVALUATE: "browser_evaluate";
316
+ };
317
+ type BrowserToolName = (typeof BROWSER_TOOLS)[keyof typeof BROWSER_TOOLS];
318
+
286
319
  /**
287
320
  * AgentBrowser-specific configuration extensions.
288
321
  */
@@ -300,6 +333,17 @@ interface AgentBrowserConfigExtensions {
300
333
  * ```
301
334
  */
302
335
  storageState?: string;
336
+ /**
337
+ * Tool names to exclude from the browser toolset.
338
+ * Use this to disable specific tools, e.g. `['browser_screenshot']`
339
+ * to skip the screenshot tool for models that don't support vision.
340
+ *
341
+ * @example
342
+ * ```ts
343
+ * new AgentBrowser({ excludeTools: ['browser_screenshot'] })
344
+ * ```
345
+ */
346
+ excludeTools?: BrowserToolName[];
303
347
  }
304
348
  /**
305
349
  * Configuration options for AgentBrowser.
@@ -387,6 +431,7 @@ declare class AgentBrowser extends MastraBrowser {
387
431
  private pidLookups;
388
432
  /** Thread manager - narrowed type from base class */
389
433
  protected threadManager: AgentBrowserThreadManager;
434
+ private browserConfig;
390
435
  constructor(config?: BrowserConfig);
391
436
  /**
392
437
  * Ensure browser is ready and thread session exists.
@@ -416,7 +461,7 @@ declare class AgentBrowser extends MastraBrowser {
416
461
  protected checkBrowserAlive(): Promise<boolean>;
417
462
  /**
418
463
  * Get the browser tools for this provider.
419
- * Returns 17 flat tools for browser automation.
464
+ * Returns 16 flat tools for browser automation.
420
465
  */
421
466
  getTools(): Record<string, Tool<any, any>>;
422
467
  /**
@@ -497,6 +542,11 @@ declare class AgentBrowser extends MastraBrowser {
497
542
  scroll: string;
498
543
  hint?: string;
499
544
  } | BrowserToolError>;
545
+ screenshot(input: ScreenshotInput, threadId?: string): Promise<{
546
+ base64: string;
547
+ url: string;
548
+ title: string;
549
+ } | BrowserToolError>;
500
550
  click(input: ClickInput, threadId?: string): Promise<{
501
551
  success: true;
502
552
  url: string;
@@ -589,28 +639,6 @@ declare class AgentBrowser extends MastraBrowser {
589
639
  */
590
640
  declare function getBrowserPid(manager: BrowserManager): Promise<number | undefined>;
591
641
 
592
- /**
593
- * Browser Tool Constants
594
- */
595
- declare const BROWSER_TOOLS: {
596
- readonly GOTO: "browser_goto";
597
- readonly SNAPSHOT: "browser_snapshot";
598
- readonly CLICK: "browser_click";
599
- readonly TYPE: "browser_type";
600
- readonly PRESS: "browser_press";
601
- readonly SELECT: "browser_select";
602
- readonly SCROLL: "browser_scroll";
603
- readonly CLOSE: "browser_close";
604
- readonly HOVER: "browser_hover";
605
- readonly BACK: "browser_back";
606
- readonly DIALOG: "browser_dialog";
607
- readonly WAIT: "browser_wait";
608
- readonly TABS: "browser_tabs";
609
- readonly DRAG: "browser_drag";
610
- readonly EVALUATE: "browser_evaluate";
611
- };
612
- type BrowserToolName = (typeof BROWSER_TOOLS)[keyof typeof BROWSER_TOOLS];
613
-
614
642
  /**
615
643
  * AgentBrowser Tools
616
644
  *
package/dist/index.js CHANGED
@@ -218,6 +218,9 @@ var dragInputSchema = z.object({
218
218
  });
219
219
  }
220
220
  });
221
+ var screenshotInputSchema = z.object({
222
+ fullPage: z.boolean().optional().describe("Capture the full scrollable page instead of just the viewport (default: false)")
223
+ });
221
224
  var evaluateInputSchema = z.object({
222
225
  script: z.string().describe(
223
226
  "JavaScript expression to evaluate in the browser and return the result. Do not use `return` \u2014 write a bare expression like `document.title` or `1 + 1`. For async code, wrap in an async IIFE: `(async () => { ... })()`."
@@ -241,6 +244,8 @@ var browserSchemas = {
241
244
  wait: waitInputSchema,
242
245
  tabs: tabsInputSchema,
243
246
  drag: dragInputSchema,
247
+ // Utility
248
+ screenshot: screenshotInputSchema,
244
249
  // Escape hatch
245
250
  evaluate: evaluateInputSchema
246
251
  };
@@ -263,6 +268,8 @@ var BROWSER_TOOLS = {
263
268
  WAIT: "browser_wait",
264
269
  TABS: "browser_tabs",
265
270
  DRAG: "browser_drag",
271
+ // Utility
272
+ SCREENSHOT: "browser_screenshot",
266
273
  // Escape hatch
267
274
  EVALUATE: "browser_evaluate"
268
275
  };
@@ -391,6 +398,38 @@ function createPressTool(browser) {
391
398
  }
392
399
  });
393
400
  }
401
+ function createScreenshotTool(browser) {
402
+ return createTool({
403
+ id: BROWSER_TOOLS.SCREENSHOT,
404
+ description: "Capture a screenshot of the current viewport as a visible PNG (set fullPage: true for full-page capture). Use snapshot when you only need text or interactive elements \u2014 screenshots are expensive. Use this when you need to visually inspect the page, e.g. evaluating images, product photos, layout, design, or colors.",
405
+ inputSchema: screenshotInputSchema,
406
+ execute: async (input, { agent }) => {
407
+ const threadId = agent?.threadId;
408
+ browser.setCurrentThread(threadId);
409
+ await browser.ensureReady();
410
+ return await browser.screenshot(input, threadId);
411
+ },
412
+ toModelOutput(output) {
413
+ const result = output;
414
+ if (typeof result.base64 !== "string") {
415
+ return {
416
+ type: "content",
417
+ value: [{ type: "text", text: result.message ?? "Failed to capture screenshot." }]
418
+ };
419
+ }
420
+ return {
421
+ type: "content",
422
+ value: [
423
+ {
424
+ type: "media",
425
+ mediaType: "image/png",
426
+ data: result.base64
427
+ }
428
+ ]
429
+ };
430
+ }
431
+ });
432
+ }
394
433
  function createScrollTool(browser) {
395
434
  return createTool({
396
435
  id: BROWSER_TOOLS.SCROLL,
@@ -482,6 +521,8 @@ function createAgentBrowserTools(browser) {
482
521
  [BROWSER_TOOLS.SELECT]: createSelectTool(browser),
483
522
  [BROWSER_TOOLS.SCROLL]: createScrollTool(browser),
484
523
  [BROWSER_TOOLS.CLOSE]: createCloseTool(browser),
524
+ // Utility
525
+ [BROWSER_TOOLS.SCREENSHOT]: createScreenshotTool(browser),
485
526
  // Extended
486
527
  [BROWSER_TOOLS.HOVER]: createHoverTool(browser),
487
528
  [BROWSER_TOOLS.BACK]: createBackTool(browser),
@@ -524,8 +565,10 @@ var AgentBrowser = class extends MastraBrowser {
524
565
  defaultTimeout = 3e4;
525
566
  /** Pending PID lookups — awaited in disconnect handlers to avoid racing. */
526
567
  pidLookups = /* @__PURE__ */ new Set();
568
+ browserConfig;
527
569
  constructor(config = {}) {
528
570
  super(config);
571
+ this.browserConfig = config;
529
572
  this.id = `agent-browser-${Date.now()}`;
530
573
  if (config.timeout) {
531
574
  this.defaultTimeout = config.timeout;
@@ -688,10 +731,17 @@ var AgentBrowser = class extends MastraBrowser {
688
731
  // ---------------------------------------------------------------------------
689
732
  /**
690
733
  * Get the browser tools for this provider.
691
- * Returns 17 flat tools for browser automation.
734
+ * Returns 16 flat tools for browser automation.
692
735
  */
693
736
  getTools() {
694
- return createAgentBrowserTools(this);
737
+ const tools = createAgentBrowserTools(this);
738
+ const exclude = this.browserConfig.excludeTools;
739
+ if (exclude?.length) {
740
+ for (const name of exclude) {
741
+ delete tools[name];
742
+ }
743
+ }
744
+ return tools;
695
745
  }
696
746
  // ---------------------------------------------------------------------------
697
747
  // Helpers
@@ -1004,6 +1054,26 @@ var AgentBrowser = class extends MastraBrowser {
1004
1054
  }
1005
1055
  }
1006
1056
  // ---------------------------------------------------------------------------
1057
+ // browser_screenshot - Capture a screenshot of the current page
1058
+ // ---------------------------------------------------------------------------
1059
+ async screenshot(input, threadId) {
1060
+ try {
1061
+ const page = await this.getPage(threadId);
1062
+ const buffer = await page.screenshot({
1063
+ fullPage: input.fullPage ?? false,
1064
+ type: "png"
1065
+ });
1066
+ const base64 = Buffer.from(buffer).toString("base64");
1067
+ return {
1068
+ base64,
1069
+ url: page.url(),
1070
+ title: await page.title()
1071
+ };
1072
+ } catch (error) {
1073
+ return this.createErrorFromException(error, "Screenshot");
1074
+ }
1075
+ }
1076
+ // ---------------------------------------------------------------------------
1007
1077
  // 3. browser_click - Click on element
1008
1078
  // ---------------------------------------------------------------------------
1009
1079
  async click(input, threadId) {