@poncho-ai/browser 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/browser@0.4.0 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
2
+ > @poncho-ai/browser@0.6.0 build /Users/cesar/Dev/latitude/poncho-ai/packages/browser
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -7,8 +7,8 @@
7
7
  CLI tsup v8.5.1
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
- ESM dist/index.js 34.91 KB
11
- ESM ⚡️ Build success in 62ms
10
+ ESM dist/index.js 40.85 KB
11
+ ESM ⚡️ Build success in 76ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 4777ms
14
- DTS dist/index.d.ts 12.44 KB
13
+ DTS ⚡️ Build success in 1346ms
14
+ DTS dist/index.d.ts 13.55 KB
@@ -0,0 +1,12 @@
1
+
2
+ > @poncho-ai/browser@0.3.0 test /Users/cesar/Dev/latitude/poncho-ai/packages/browser
3
+ > vitest --passWithNoTests
4
+
5
+
6
+  RUN  v1.6.1 /Users/cesar/Dev/latitude/poncho-ai/packages/browser
7
+
8
+ include: **/*.{test,spec}.?(c|m)[jt]s?(x)
9
+ exclude: **/node_modules/**, **/dist/**, **/cypress/**, **/.{idea,git,cache,output,temp}/**, **/{karma,rollup,webpack,vite,vitest,jest,ava,babel,nyc,cypress,tsup,build,eslint,prettier}.config.*
10
+ watch exclude: **/node_modules/**, **/dist/**
11
+ No test files found, exiting with code 0
12
+
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @poncho-ai/browser
2
2
 
3
+ ## 0.6.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [`76294e9`](https://github.com/cesr/poncho-ai/commit/76294e95035bf3abbb19c28871a33f82351c49ec) Thanks [@cesr](https://github.com/cesr)! - Support remote and serverless browser deployments.
8
+
9
+ **@poncho-ai/browser**: Add `provider` and `cdpUrl` config options for cloud browser services (Browserbase, Browser Use, Kernel) and direct CDP connections. Auto-detect `@sparticuz/chromium` on serverless platforms (Vercel, Lambda) and default the profile directory to `/tmp`.
10
+
11
+ **@poncho-ai/cli**: Generate @vercel/nft trace hints for `@poncho-ai/browser` and `@sparticuz/chromium` in the Vercel entry point so dynamically-loaded browser packages are bundled into the serverless function.
12
+
13
+ ## 0.5.0
14
+
15
+ ### Minor Changes
16
+
17
+ - [`540c8e6`](https://github.com/cesr/poncho-ai/commit/540c8e6d895a95c2f215deb4af219069543371d9) Thanks [@cesr](https://github.com/cesr)! - Add `browser_click_text` and `browser_execute_js` tools for interacting with elements that don't appear in the accessibility snapshot (e.g. styled divs acting as buttons). Also force new-tab navigations (`window.open`, `target="_blank"`) to stay in the current tab so agents don't lose context.
18
+
3
19
  ## 0.4.0
4
20
 
5
21
  ### Minor Changes
package/dist/index.d.ts CHANGED
@@ -64,6 +64,13 @@ interface BrowserConfig {
64
64
  * user-agent, and passes anti-automation Chrome flags. */
65
65
  stealth?: boolean;
66
66
  storagePersistence?: BrowserStoragePersistence;
67
+ /** Cloud browser provider. Requires the provider's API key env var to be set
68
+ * (e.g. `BROWSERBASE_API_KEY` + `BROWSERBASE_PROJECT_ID` for Browserbase).
69
+ * When set, the browser runs remotely instead of launching a local Chromium. */
70
+ provider?: "browserbase" | "browseruse" | "kernel";
71
+ /** Connect to an existing browser via Chrome DevTools Protocol URL or port.
72
+ * Mutually exclusive with `provider`. */
73
+ cdpUrl?: string;
67
74
  }
68
75
 
69
76
  type FrameListener = (frame: BrowserFrame) => void;
@@ -90,11 +97,24 @@ declare class BrowserSession {
90
97
  * Only needs to be called once per browser launch.
91
98
  */
92
99
  private installContextStealth;
100
+ /**
101
+ * Force all new-tab navigations (window.open, target="_blank") to open
102
+ * in the current tab instead. Agents operate on a single tab at a time
103
+ * and can't see or interact with popups.
104
+ */
105
+ private installSameTabScript;
93
106
  /**
94
107
  * Override the user-agent via CDP on the current page target.
95
108
  * CDP Network.setUserAgentOverride is per-target, so call per-tab.
96
109
  */
97
110
  private overrideUserAgentOnPage;
111
+ private get isRemote();
112
+ private get isServerless();
113
+ /**
114
+ * Resolve executablePath for local launches. When no explicit path is set
115
+ * and we're on a serverless platform, try `@sparticuz/chromium` automatically.
116
+ */
117
+ private resolveExecutablePath;
98
118
  private launchFreshManager;
99
119
  private ensureManager;
100
120
  private evictOldestTab;
@@ -122,6 +142,8 @@ declare class BrowserSession {
122
142
  title: string;
123
143
  }>;
124
144
  scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
145
+ clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
146
+ executeJs(conversationId: string, script: string): Promise<unknown>;
125
147
  closeTab(conversationId: string): Promise<void>;
126
148
  navigate(conversationId: string, action: string): Promise<void>;
127
149
  startScreencast(conversationId: string, options?: ScreencastOptions): Promise<void>;
package/dist/index.js CHANGED
@@ -186,6 +186,46 @@ async function getBrowserManagerCtor() {
186
186
  return BrowserManagerCtor;
187
187
  }
188
188
  var MAX_TABS = 8;
189
+ var SAME_TAB_INIT_SCRIPT = `
190
+ (() => {
191
+ // Override window.open to navigate in-place
192
+ try {
193
+ const origOpen = window.open;
194
+ window.open = function(url, target, features) {
195
+ if (url) {
196
+ location.href = url;
197
+ return window;
198
+ }
199
+ return origOpen.call(this, url, target, features);
200
+ };
201
+ } catch {}
202
+
203
+ // Rewrite target="_blank" on existing and future links
204
+ try {
205
+ const rewrite = (el) => {
206
+ if (el.tagName === 'A' && el.target === '_blank') {
207
+ el.target = '_self';
208
+ }
209
+ };
210
+ // Catch links already in the DOM
211
+ document.addEventListener('DOMContentLoaded', () => {
212
+ document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
213
+ });
214
+ // Catch dynamically added links
215
+ new MutationObserver((mutations) => {
216
+ for (const m of mutations) {
217
+ for (const node of m.addedNodes) {
218
+ if (node.nodeType !== 1) continue;
219
+ rewrite(node);
220
+ if (node.querySelectorAll) {
221
+ node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
222
+ }
223
+ }
224
+ }
225
+ }).observe(document.documentElement, { childList: true, subtree: true });
226
+ } catch {}
227
+ })();
228
+ `;
189
229
  var BrowserSession = class {
190
230
  config;
191
231
  sessionId;
@@ -264,6 +304,20 @@ var BrowserSession = class {
264
304
  console.warn("[poncho][browser] Failed to install stealth init script:", err?.message ?? err);
265
305
  }
266
306
  }
307
+ /**
308
+ * Force all new-tab navigations (window.open, target="_blank") to open
309
+ * in the current tab instead. Agents operate on a single tab at a time
310
+ * and can't see or interact with popups.
311
+ */
312
+ async installSameTabScript(mgr) {
313
+ const ctx = mgr.getContext();
314
+ if (!ctx) return;
315
+ try {
316
+ await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
317
+ } catch (err) {
318
+ console.warn("[poncho][browser] Failed to install same-tab init script:", err?.message ?? err);
319
+ }
320
+ }
267
321
  /**
268
322
  * Override the user-agent via CDP on the current page target.
269
323
  * CDP Network.setUserAgentOverride is per-target, so call per-tab.
@@ -284,22 +338,61 @@ var BrowserSession = class {
284
338
  console.warn("[poncho][browser] Failed to override UA via CDP:", err?.message ?? err);
285
339
  }
286
340
  }
341
+ get isRemote() {
342
+ return !!(this.config.provider || this.config.cdpUrl);
343
+ }
344
+ get isServerless() {
345
+ return !!(process.env.VERCEL || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.AWS_EXECUTION_ENV || process.env.SERVERLESS);
346
+ }
347
+ /**
348
+ * Resolve executablePath for local launches. When no explicit path is set
349
+ * and we're on a serverless platform, try `@sparticuz/chromium` automatically.
350
+ */
351
+ async resolveExecutablePath() {
352
+ if (this.config.executablePath) return this.config.executablePath;
353
+ if (!this.isServerless) return void 0;
354
+ try {
355
+ const spec = ["@sparticuz", "chromium"].join("/");
356
+ const mod = await import(
357
+ /* webpackIgnore: true */
358
+ spec
359
+ );
360
+ const chromium = mod.default ?? mod;
361
+ const path = await chromium.executablePath();
362
+ console.log(`[poncho][browser] Auto-detected @sparticuz/chromium: ${path}`);
363
+ return path;
364
+ } catch {
365
+ return void 0;
366
+ }
367
+ }
287
368
  async launchFreshManager() {
288
369
  const Ctor = await getBrowserManagerCtor();
289
370
  const mgr = new Ctor();
290
371
  const viewport = this.config.viewport ?? { width: 1280, height: 720 };
291
- await mkdir(this.profileDir, { recursive: true });
372
+ const executablePath = await this.resolveExecutablePath();
292
373
  const launchOpts = {
293
374
  action: "launch",
294
375
  headless: this.config.headless ?? true,
295
376
  viewport: { width: viewport.width ?? 1280, height: viewport.height ?? 720 },
296
- executablePath: this.config.executablePath,
297
- profile: this.profileDir
377
+ executablePath
298
378
  };
379
+ if (this.config.cdpUrl) {
380
+ launchOpts.cdpUrl = this.config.cdpUrl;
381
+ console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
382
+ } else if (this.config.provider) {
383
+ launchOpts.provider = this.config.provider;
384
+ console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
385
+ } else {
386
+ const profileDir = this.isServerless && !this.config.profileDir ? join(tmpdir(), "poncho-browser", this.sessionId) : this.profileDir;
387
+ await mkdir(profileDir, { recursive: true });
388
+ launchOpts.profile = profileDir;
389
+ }
299
390
  if (this.stealthEnabled) {
300
391
  const ua = this.stealthUserAgent;
301
392
  launchOpts.userAgent = ua;
302
- launchOpts.args = buildStealthArgs(ua);
393
+ if (!this.isRemote) {
394
+ launchOpts.args = buildStealthArgs(ua);
395
+ }
303
396
  console.log("[poncho][browser] Launching with stealth mode enabled (UA: " + ua + ")");
304
397
  } else if (this.config.userAgent) {
305
398
  launchOpts.userAgent = this.config.userAgent;
@@ -310,6 +403,7 @@ var BrowserSession = class {
310
403
  if (this.stealthEnabled) {
311
404
  await this.installContextStealth(mgr);
312
405
  }
406
+ await this.installSameTabScript(mgr);
313
407
  try {
314
408
  const cdp = await mgr.getCDPSession();
315
409
  await cdp.send("Debugger.disable");
@@ -558,6 +652,30 @@ var BrowserSession = class {
558
652
  this.unlock();
559
653
  }
560
654
  }
655
+ async clickText(conversationId, text, exact) {
656
+ await this.lock();
657
+ try {
658
+ const mgr = await this.ensureManager();
659
+ const tab = await this.switchToConversation(mgr, conversationId);
660
+ const selector = exact ? `text="${text}"` : `text=${text}`;
661
+ const locator = mgr.getLocator(selector);
662
+ await locator.click();
663
+ tab.url = mgr.getPage().url();
664
+ } finally {
665
+ this.unlock();
666
+ }
667
+ }
668
+ async executeJs(conversationId, script) {
669
+ await this.lock();
670
+ try {
671
+ const mgr = await this.ensureManager();
672
+ await this.switchToConversation(mgr, conversationId);
673
+ const page = mgr.getPage();
674
+ return await page.evaluate(script);
675
+ } finally {
676
+ this.unlock();
677
+ }
678
+ }
561
679
  async closeTab(conversationId) {
562
680
  await this.lock();
563
681
  try {
@@ -907,6 +1025,53 @@ function createBrowserTools(getSession, getConversationId) {
907
1025
  return { clicked: ref };
908
1026
  }
909
1027
  },
1028
+ {
1029
+ name: "browser_click_text",
1030
+ description: "Click the first visible element on the page that contains the given text. Use this when an element doesn't appear in the snapshot \u2014 e.g. styled divs acting as buttons. By default matches substring (case-insensitive); set exact=true for exact text match.",
1031
+ inputSchema: {
1032
+ type: "object",
1033
+ properties: {
1034
+ text: {
1035
+ type: "string",
1036
+ description: "The visible text of the element to click"
1037
+ },
1038
+ exact: {
1039
+ type: "boolean",
1040
+ description: "If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive)."
1041
+ }
1042
+ },
1043
+ required: ["text"]
1044
+ },
1045
+ handler: async (input) => {
1046
+ const session = getSession();
1047
+ const text = String(input.text ?? "");
1048
+ if (!text) throw new Error("text is required");
1049
+ const exact = input.exact === true;
1050
+ await session.clickText(getConversationId(), text, exact);
1051
+ return { clicked: text, exact };
1052
+ }
1053
+ },
1054
+ {
1055
+ name: "browser_execute_js",
1056
+ description: "Execute JavaScript in the current page context and return the result. Use this to inspect or interact with the DOM when snapshot refs aren't available \u2014 e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. The script is evaluated via page.evaluate(); return a value to get it back.",
1057
+ inputSchema: {
1058
+ type: "object",
1059
+ properties: {
1060
+ script: {
1061
+ type: "string",
1062
+ description: "JavaScript code to evaluate in the page. Use a return statement or expression to get a result back."
1063
+ }
1064
+ },
1065
+ required: ["script"]
1066
+ },
1067
+ handler: async (input) => {
1068
+ const session = getSession();
1069
+ const script = String(input.script ?? "");
1070
+ if (!script) throw new Error("script is required");
1071
+ const result = await session.executeJs(getConversationId(), script);
1072
+ return { result: result ?? null };
1073
+ }
1074
+ },
910
1075
  {
911
1076
  name: "browser_type",
912
1077
  description: "Type text into a form field identified by its ref from the last snapshot. This clears the field first, then types the new value.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/browser",
3
- "version": "0.4.0",
3
+ "version": "0.6.0",
4
4
  "description": "Browser automation for Poncho agents, powered by agent-browser",
5
5
  "repository": {
6
6
  "type": "git",
package/src/session.ts CHANGED
@@ -62,6 +62,51 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
62
62
 
63
63
  const MAX_TABS = 8;
64
64
 
65
+ /**
66
+ * Init script that forces new-tab navigations (window.open, target="_blank")
67
+ * to open in the current tab. Runs before page scripts on every navigation.
68
+ */
69
+ const SAME_TAB_INIT_SCRIPT = `
70
+ (() => {
71
+ // Override window.open to navigate in-place
72
+ try {
73
+ const origOpen = window.open;
74
+ window.open = function(url, target, features) {
75
+ if (url) {
76
+ location.href = url;
77
+ return window;
78
+ }
79
+ return origOpen.call(this, url, target, features);
80
+ };
81
+ } catch {}
82
+
83
+ // Rewrite target="_blank" on existing and future links
84
+ try {
85
+ const rewrite = (el) => {
86
+ if (el.tagName === 'A' && el.target === '_blank') {
87
+ el.target = '_self';
88
+ }
89
+ };
90
+ // Catch links already in the DOM
91
+ document.addEventListener('DOMContentLoaded', () => {
92
+ document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
93
+ });
94
+ // Catch dynamically added links
95
+ new MutationObserver((mutations) => {
96
+ for (const m of mutations) {
97
+ for (const node of m.addedNodes) {
98
+ if (node.nodeType !== 1) continue;
99
+ rewrite(node);
100
+ if (node.querySelectorAll) {
101
+ node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
102
+ }
103
+ }
104
+ }
105
+ }).observe(document.documentElement, { childList: true, subtree: true });
106
+ } catch {}
107
+ })();
108
+ `;
109
+
65
110
  // Per-conversation tab state
66
111
  interface ConversationTab {
67
112
  tabIndex: number;
@@ -163,6 +208,21 @@ export class BrowserSession {
163
208
  }
164
209
  }
165
210
 
211
+ /**
212
+ * Force all new-tab navigations (window.open, target="_blank") to open
213
+ * in the current tab instead. Agents operate on a single tab at a time
214
+ * and can't see or interact with popups.
215
+ */
216
+ private async installSameTabScript(mgr: BrowserManagerInstance): Promise<void> {
217
+ const ctx = mgr.getContext();
218
+ if (!ctx) return;
219
+ try {
220
+ await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
221
+ } catch (err) {
222
+ console.warn("[poncho][browser] Failed to install same-tab init script:", (err as Error)?.message ?? err);
223
+ }
224
+ }
225
+
166
226
  /**
167
227
  * Override the user-agent via CDP on the current page target.
168
228
  * CDP Network.setUserAgentOverride is per-target, so call per-tab.
@@ -184,25 +244,74 @@ export class BrowserSession {
184
244
  }
185
245
  }
186
246
 
247
+ private get isRemote(): boolean {
248
+ return !!(this.config.provider || this.config.cdpUrl);
249
+ }
250
+
251
+ private get isServerless(): boolean {
252
+ return !!(
253
+ process.env.VERCEL ||
254
+ process.env.AWS_LAMBDA_FUNCTION_NAME ||
255
+ process.env.AWS_EXECUTION_ENV ||
256
+ process.env.SERVERLESS
257
+ );
258
+ }
259
+
260
+ /**
261
+ * Resolve executablePath for local launches. When no explicit path is set
262
+ * and we're on a serverless platform, try `@sparticuz/chromium` automatically.
263
+ */
264
+ private async resolveExecutablePath(): Promise<string | undefined> {
265
+ if (this.config.executablePath) return this.config.executablePath;
266
+ if (!this.isServerless) return undefined;
267
+ try {
268
+ // Dynamic require — @sparticuz/chromium is an optional peer dependency
269
+ // that the user installs in their agent project for serverless runtimes.
270
+ const spec = ["@sparticuz", "chromium"].join("/");
271
+ const mod = await import(/* webpackIgnore: true */ spec);
272
+ const chromium = mod.default ?? mod;
273
+ const path = await chromium.executablePath();
274
+ console.log(`[poncho][browser] Auto-detected @sparticuz/chromium: ${path}`);
275
+ return path;
276
+ } catch {
277
+ return undefined;
278
+ }
279
+ }
280
+
187
281
  private async launchFreshManager(): Promise<BrowserManagerInstance> {
188
282
  const Ctor = await getBrowserManagerCtor();
189
283
  const mgr = new Ctor();
190
284
 
191
285
  const viewport = this.config.viewport ?? { width: 1280, height: 720 };
192
- await mkdir(this.profileDir, { recursive: true });
286
+ const executablePath = await this.resolveExecutablePath();
193
287
 
194
288
  const launchOpts: Record<string, unknown> = {
195
289
  action: "launch",
196
290
  headless: this.config.headless ?? true,
197
291
  viewport: { width: viewport.width ?? 1280, height: viewport.height ?? 720 },
198
- executablePath: this.config.executablePath,
199
- profile: this.profileDir,
292
+ executablePath,
200
293
  };
201
294
 
295
+ if (this.config.cdpUrl) {
296
+ launchOpts.cdpUrl = this.config.cdpUrl;
297
+ console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
298
+ } else if (this.config.provider) {
299
+ launchOpts.provider = this.config.provider;
300
+ console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
301
+ } else {
302
+ const profileDir = this.isServerless && !this.config.profileDir
303
+ ? join(tmpdir(), "poncho-browser", this.sessionId)
304
+ : this.profileDir;
305
+ await mkdir(profileDir, { recursive: true });
306
+ launchOpts.profile = profileDir;
307
+ }
308
+
202
309
  if (this.stealthEnabled) {
203
310
  const ua = this.stealthUserAgent!;
204
311
  launchOpts.userAgent = ua;
205
- launchOpts.args = buildStealthArgs(ua);
312
+ if (!this.isRemote) {
313
+ launchOpts.args = buildStealthArgs(ua);
314
+ }
206
315
  console.log("[poncho][browser] Launching with stealth mode enabled (UA: " + ua + ")");
207
316
  } else if (this.config.userAgent) {
208
317
  launchOpts.userAgent = this.config.userAgent;
@@ -219,6 +328,9 @@ export class BrowserSession {
219
328
  await this.installContextStealth(mgr);
220
329
  }
221
330
 
331
+ // Redirect new-tab navigations into the current tab
332
+ await this.installSameTabScript(mgr);
333
+
222
334
  try {
223
335
  const cdp = await mgr.getCDPSession();
224
336
  await cdp.send("Debugger.disable");
@@ -475,6 +587,32 @@ export class BrowserSession {
475
587
  }
476
588
  }
477
589
 
590
+ async clickText(conversationId: string, text: string, exact?: boolean): Promise<void> {
591
+ await this.lock();
592
+ try {
593
+ const mgr = await this.ensureManager();
594
+ const tab = await this.switchToConversation(mgr, conversationId);
595
+ const selector = exact ? `text="${text}"` : `text=${text}`;
596
+ const locator = mgr.getLocator(selector);
597
+ await locator.click();
598
+ tab.url = mgr.getPage().url();
599
+ } finally {
600
+ this.unlock();
601
+ }
602
+ }
603
+
604
+ async executeJs(conversationId: string, script: string): Promise<unknown> {
605
+ await this.lock();
606
+ try {
607
+ const mgr = await this.ensureManager();
608
+ await this.switchToConversation(mgr, conversationId);
609
+ const page = mgr.getPage();
610
+ return await page.evaluate(script);
611
+ } finally {
612
+ this.unlock();
613
+ }
614
+ }
615
+
478
616
  async closeTab(conversationId: string): Promise<void> {
479
617
  await this.lock();
480
618
  try {
package/src/tools.ts CHANGED
@@ -73,6 +73,62 @@ export function createBrowserTools(
73
73
  return { clicked: ref };
74
74
  },
75
75
  },
76
+ {
77
+ name: "browser_click_text",
78
+ description:
79
+ "Click the first visible element on the page that contains the given text. " +
80
+ "Use this when an element doesn't appear in the snapshot — e.g. styled divs acting as buttons. " +
81
+ "By default matches substring (case-insensitive); set exact=true for exact text match.",
82
+ inputSchema: {
83
+ type: "object",
84
+ properties: {
85
+ text: {
86
+ type: "string",
87
+ description: "The visible text of the element to click",
88
+ },
89
+ exact: {
90
+ type: "boolean",
91
+ description:
92
+ "If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive).",
93
+ },
94
+ },
95
+ required: ["text"],
96
+ },
97
+ handler: async (input: BrowserToolInput) => {
98
+ const session = getSession();
99
+ const text = String(input.text ?? "");
100
+ if (!text) throw new Error("text is required");
101
+ const exact = input.exact === true;
102
+ await session.clickText(getConversationId(), text, exact);
103
+ return { clicked: text, exact };
104
+ },
105
+ },
106
+ {
107
+ name: "browser_execute_js",
108
+ description:
109
+ "Execute JavaScript in the current page context and return the result. " +
110
+ "Use this to inspect or interact with the DOM when snapshot refs aren't available — " +
111
+ "e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. " +
112
+ "The script is evaluated via page.evaluate(); return a value to get it back.",
113
+ inputSchema: {
114
+ type: "object",
115
+ properties: {
116
+ script: {
117
+ type: "string",
118
+ description:
119
+ "JavaScript code to evaluate in the page. Use a return statement or expression to get a result back.",
120
+ },
121
+ },
122
+ required: ["script"],
123
+ },
124
+ handler: async (input: BrowserToolInput) => {
125
+ const session = getSession();
126
+ const script = String(input.script ?? "");
127
+ if (!script) throw new Error("script is required");
128
+ const result = await session.executeJs(getConversationId(), script);
129
+ return { result: result ?? null };
130
+ },
131
+ },
76
132
  {
77
133
  name: "browser_type",
78
134
  description:
package/src/types.ts CHANGED
@@ -70,4 +70,11 @@ export interface BrowserConfig {
70
70
  * user-agent, and passes anti-automation Chrome flags. */
71
71
  stealth?: boolean;
72
72
  storagePersistence?: BrowserStoragePersistence;
73
+ /** Cloud browser provider. Requires the provider's API key env var to be set
74
+ * (e.g. `BROWSERBASE_API_KEY` + `BROWSERBASE_PROJECT_ID` for Browserbase).
75
+ * When set, the browser runs remotely instead of launching a local Chromium. */
76
+ provider?: "browserbase" | "browseruse" | "kernel";
77
+ /** Connect to an existing browser via Chrome DevTools Protocol URL or port.
78
+ * Mutually exclusive with `provider`. */
79
+ cdpUrl?: string;
73
80
  }