@poncho-ai/browser 0.6.26 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/browser@0.6.26 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
2
+ > @poncho-ai/browser@0.7.1 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -7,8 +7,8 @@
7
7
  CLI tsup v8.5.1
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
- ESM dist/index.js 47.98 KB
11
- ESM ⚡️ Build success in 60ms
10
+ ESM dist/index.js 57.26 KB
11
+ ESM ⚡️ Build success in 77ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 4894ms
14
- DTS dist/index.d.ts 13.77 KB
13
+ DTS ⚡️ Build success in 5467ms
14
+ DTS dist/index.d.ts 15.75 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,38 @@
1
1
  # @poncho-ai/browser
2
2
 
3
+ ## 0.7.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [#188](https://github.com/cesr/poncho-ai/pull/188) [`97772cc`](https://github.com/cesr/poncho-ai/commit/97772ccf2c07ec3a3f3350ef3a65596fba91a154) Thanks [@cesr](https://github.com/cesr)! - Add residential-proxy support for Browserbase sessions so IP-reputation walls
8
+ (Reddit, LinkedIn, Instagram, …) stop returning 403 "blocked by network
9
+ security". Datacenter IPs are blocked before any fingerprint check, so stealth
10
+ alone can't get past them.
11
+ - Known IP-blocking domains are proxied automatically (domain gate).
12
+ - `browser_open` gains a `proxy` param so the agent can retry any other site
13
+ that blocked it through a residential IP.
14
+ - `BrowserConfig.proxies` sets the default mode for every session.
15
+
16
+ Because proxies are fixed at Browserbase-session creation (and Vercel's
17
+ agent-browser hardcodes the create body to `{ projectId }`), we create the
18
+ Browserbase session ourselves with `proxies: true` and connect agent-browser to
19
+ it via its `cdpUrl` path. Switching proxy mode mid-conversation recreates the
20
+ session; cookies/localStorage are persisted and restored across the recreate,
21
+ so login state survives.
22
+
23
+ ## 0.7.0
24
+
25
+ ### Minor Changes
26
+
27
+ - [#184](https://github.com/cesr/poncho-ai/pull/184) [`12ce2be`](https://github.com/cesr/poncho-ai/commit/12ce2be01c9d98b1d9aa634d4d8051c4c0094a44) Thanks [@cesr](https://github.com/cesr)! - Add `browser_download` so the agent can save files from the browser into the
28
+ VFS. The tool fetches a file using the page's logged-in session (so it works
29
+ for files behind a login) and writes the bytes straight to the tenant's VFS via
30
+ `ToolContext.vfs` — never through the model. `url` defaults to the current page,
31
+ or pass a same-origin link's href. The fetch runs inside the page (`evaluate`),
32
+ so it works identically for local and remote/cloud browsers (bytes return over
33
+ CDP). Capped at 25 MB. The harness browser system prompt now documents it under
34
+ a "Saving files" section.
35
+
3
36
  ## 0.6.26
4
37
 
5
38
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -71,6 +71,12 @@ interface BrowserConfig {
71
71
  /** Connect to an existing browser via Chrome DevTools Protocol URL or port.
72
72
  * Mutually exclusive with `provider`. */
73
73
  cdpUrl?: string;
74
+ /** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
75
+ * hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
76
+ * fingerprint check. Known such domains are always proxied automatically, and
77
+ * `browser_open` can force it per-navigation; set this `true` to proxy every
78
+ * session by default. Billed per GB by Browserbase. Browserbase provider only. */
79
+ proxies?: boolean;
74
80
  }
75
81
 
76
82
  type FrameListener = (frame: BrowserFrame) => void;
@@ -87,6 +93,8 @@ declare class BrowserSession {
87
93
  private _lockQueue;
88
94
  private _locked;
89
95
  private _screencastConversation;
96
+ private launchedProxyMode;
97
+ private proxyEnabled;
90
98
  constructor(sessionId: string, config?: BrowserConfig);
91
99
  get profileDir(): string;
92
100
  private lock;
@@ -131,9 +139,22 @@ declare class BrowserSession {
131
139
  getUrl(conversationId: string): string | undefined;
132
140
  /** Whether the browser has been launched. */
133
141
  get isLaunched(): boolean;
134
- open(conversationId: string, url: string): Promise<{
142
+ open(conversationId: string, url: string, opts?: {
143
+ proxy?: boolean;
144
+ }): Promise<{
135
145
  title?: string;
136
146
  }>;
147
+ /**
148
+ * Reconcile the residential-proxy mode before a navigation. The desired mode
149
+ * is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
150
+ * URL hitting a known IP-blocking domain OR the config default. Since proxies
151
+ * are fixed at Browserbase-session creation, a change tears the live session
152
+ * down so the next ensureManager relaunches proxied. Cookies/localStorage are
153
+ * persisted first and restored on relaunch, so login state survives; open
154
+ * tabs in other conversations of the same session are lost (rare, and only
155
+ * when the mode actually flips). No-op unless the provider is browserbase.
156
+ */
157
+ private ensureProxyMode;
137
158
  private _doOpen;
138
159
  snapshot(conversationId: string): Promise<string>;
139
160
  click(conversationId: string, ref: string): Promise<void>;
@@ -144,6 +165,21 @@ declare class BrowserSession {
144
165
  url: string;
145
166
  title: string;
146
167
  }>;
168
+ /**
169
+ * Fetch a file using the page's own (logged-in) session and return its
170
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
171
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
172
+ * the site's cookies and works the same whether the browser is local or a
173
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
174
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
175
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
176
+ * same-origin) or pass its direct URL while on that site.
177
+ */
178
+ download(conversationId: string, url?: string): Promise<{
179
+ data: Buffer;
180
+ contentType: string;
181
+ filename: string;
182
+ }>;
147
183
  scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
148
184
  clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
149
185
  executeJs(conversationId: string, script: string): Promise<unknown>;
package/dist/index.js CHANGED
@@ -186,6 +186,49 @@ async function getBrowserManagerCtor() {
186
186
  return BrowserManagerCtor;
187
187
  }
188
188
  var MAX_TABS = 8;
189
+ var PROXY_DOMAINS = [
190
+ "reddit.com",
191
+ "linkedin.com",
192
+ "instagram.com",
193
+ "facebook.com",
194
+ "x.com",
195
+ "twitter.com",
196
+ "tiktok.com",
197
+ "quora.com",
198
+ "pinterest.com"
199
+ ];
200
+ function shouldProxyFor(url) {
201
+ let host;
202
+ try {
203
+ host = new URL(url).hostname.toLowerCase();
204
+ } catch {
205
+ return false;
206
+ }
207
+ return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
208
+ }
209
+ async function createBrowserbaseProxiedSession() {
210
+ const apiKey = process.env.BROWSERBASE_API_KEY;
211
+ const projectId = process.env.BROWSERBASE_PROJECT_ID;
212
+ if (!apiKey || !projectId) {
213
+ throw new Error(
214
+ "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase"
215
+ );
216
+ }
217
+ const res = await fetch("https://api.browserbase.com/v1/sessions", {
218
+ method: "POST",
219
+ headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
220
+ body: JSON.stringify({ projectId, proxies: true })
221
+ });
222
+ if (!res.ok) {
223
+ const detail = await res.text().catch(() => "");
224
+ throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
225
+ }
226
+ const session = await res.json();
227
+ if (!session.connectUrl) {
228
+ throw new Error("Browserbase session response missing connectUrl");
229
+ }
230
+ return session.connectUrl;
231
+ }
189
232
  var VALID_SAME_SITE = ["Strict", "Lax", "None"];
190
233
  function sanitizeCookieForCDP(c) {
191
234
  const name = typeof c.name === "string" ? c.name : "";
@@ -244,6 +287,28 @@ var SAME_TAB_INIT_SCRIPT = `
244
287
  } catch {}
245
288
  })();
246
289
  `;
290
+ function sanitizeName(name) {
291
+ const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
292
+ return cleaned || "download";
293
+ }
294
+ function filenameFromDownload(disposition, url) {
295
+ const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
296
+ if (star?.[1]) {
297
+ try {
298
+ return sanitizeName(decodeURIComponent(star[1]));
299
+ } catch {
300
+ return sanitizeName(star[1]);
301
+ }
302
+ }
303
+ const plain = /filename=["']?([^"';]+)/i.exec(disposition);
304
+ if (plain?.[1]) return sanitizeName(plain[1]);
305
+ try {
306
+ const base = new URL(url).pathname.split("/").filter(Boolean).pop();
307
+ if (base) return sanitizeName(decodeURIComponent(base));
308
+ } catch {
309
+ }
310
+ return "download";
311
+ }
247
312
  var BrowserSession = class {
248
313
  config;
249
314
  sessionId;
@@ -268,6 +333,14 @@ var BrowserSession = class {
268
333
  _locked = false;
269
334
  // Currently screencast conversation (only one at a time due to CDP)
270
335
  _screencastConversation;
336
+ // Residential-proxy mode. `launchedProxyMode` is what the currently-launched
337
+ // remote session was created with; `proxyEnabled` is the mode the NEXT launch
338
+ // should use. They diverge when a navigation asks for a different mode (a
339
+ // hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
340
+ // fixed at Browserbase-session creation, so switching means recreating the
341
+ // session (see ensureProxyMode). Only meaningful for the browserbase provider.
342
+ launchedProxyMode = false;
343
+ proxyEnabled = false;
271
344
  constructor(sessionId, config = {}) {
272
345
  this.sessionId = sessionId;
273
346
  this.config = config;
@@ -420,6 +493,9 @@ var BrowserSession = class {
420
493
  if (this.config.cdpUrl) {
421
494
  launchOpts.cdpUrl = this.config.cdpUrl;
422
495
  console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
496
+ } else if (this.config.provider === "browserbase" && this.proxyEnabled) {
497
+ launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
498
+ console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
423
499
  } else if (this.config.provider) {
424
500
  launchOpts.provider = this.config.provider;
425
501
  console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
@@ -448,6 +524,7 @@ var BrowserSession = class {
448
524
  launchOpts.args = baseArgs;
449
525
  }
450
526
  await mgr.launch(launchOpts);
527
+ this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
451
528
  if (this.isRemote) {
452
529
  try {
453
530
  await mgr.setViewport(viewport.width ?? 1280, viewport.height ?? 720);
@@ -587,9 +664,10 @@ var BrowserSession = class {
587
664
  // -----------------------------------------------------------------------
588
665
  // Browser operations (all scoped by conversationId)
589
666
  // -----------------------------------------------------------------------
590
- async open(conversationId, url) {
667
+ async open(conversationId, url, opts) {
591
668
  await this.lock();
592
669
  try {
670
+ await this.ensureProxyMode(url, opts?.proxy);
593
671
  return await this._doOpen(conversationId, url);
594
672
  } catch (err) {
595
673
  const msg = err?.message ?? "";
@@ -616,6 +694,41 @@ var BrowserSession = class {
616
694
  this.unlock();
617
695
  }
618
696
  }
697
+ /**
698
+ * Reconcile the residential-proxy mode before a navigation. The desired mode
699
+ * is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
700
+ * URL hitting a known IP-blocking domain OR the config default. Since proxies
701
+ * are fixed at Browserbase-session creation, a change tears the live session
702
+ * down so the next ensureManager relaunches proxied. Cookies/localStorage are
703
+ * persisted first and restored on relaunch, so login state survives; open
704
+ * tabs in other conversations of the same session are lost (rare, and only
705
+ * when the mode actually flips). No-op unless the provider is browserbase.
706
+ */
707
+ async ensureProxyMode(url, requested) {
708
+ if (this.config.provider !== "browserbase") return;
709
+ const want = requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
710
+ this.proxyEnabled = want;
711
+ if (!this.manager || want === this.launchedProxyMode) return;
712
+ console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
713
+ try {
714
+ await this.persistStorageState();
715
+ } catch {
716
+ }
717
+ try {
718
+ await this.manager.close();
719
+ } catch {
720
+ }
721
+ this.manager = void 0;
722
+ this._contextStealthInstalled = false;
723
+ this._uaOverrideApplied.clear();
724
+ for (const [, t] of this.tabs) {
725
+ if (t.tabIndex >= 0) {
726
+ t.tabIndex = -1;
727
+ t.active = false;
728
+ t.url = void 0;
729
+ }
730
+ }
731
+ }
619
732
  async _doOpen(conversationId, url) {
620
733
  const mgr = await this.ensureManager();
621
734
  const tab = await this.switchToConversation(mgr, conversationId);
@@ -693,6 +806,55 @@ var BrowserSession = class {
693
806
  this.unlock();
694
807
  }
695
808
  }
809
+ /**
810
+ * Fetch a file using the page's own (logged-in) session and return its
811
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
812
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
813
+ * the site's cookies and works the same whether the browser is local or a
814
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
815
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
816
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
817
+ * same-origin) or pass its direct URL while on that site.
818
+ */
819
+ async download(conversationId, url) {
820
+ await this.lock();
821
+ try {
822
+ const mgr = await this.ensureManager();
823
+ await this.switchToConversation(mgr, conversationId);
824
+ const page = mgr.getPage();
825
+ const target = url && url.trim() ? url.trim() : page.url();
826
+ if (!target || target === "about:blank") {
827
+ throw new Error("no URL to download (open the file's page first, or pass a url)");
828
+ }
829
+ const MAX_BYTES = 25 * 1024 * 1024;
830
+ const expr = `(async () => {
831
+ const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
832
+ if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
833
+ const buf = new Uint8Array(await res.arrayBuffer());
834
+ if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
835
+ let bin = "";
836
+ const CH = 0x8000;
837
+ for (let i = 0; i < buf.length; i += CH) {
838
+ bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
839
+ }
840
+ return {
841
+ base64: btoa(bin),
842
+ contentType: res.headers.get("content-type") || "",
843
+ disposition: res.headers.get("content-disposition") || "",
844
+ finalUrl: res.url || ${JSON.stringify(target)},
845
+ };
846
+ })()`;
847
+ const r = await page.evaluate(expr);
848
+ const data = Buffer.from(r.base64, "base64");
849
+ return {
850
+ data,
851
+ contentType: r.contentType,
852
+ filename: filenameFromDownload(r.disposition, r.finalUrl)
853
+ };
854
+ } finally {
855
+ this.unlock();
856
+ }
857
+ }
696
858
  async scroll(conversationId, direction, amount) {
697
859
  await this.lock();
698
860
  try {
@@ -1118,13 +1280,17 @@ function createBrowserTools(getSession) {
1118
1280
  return [
1119
1281
  {
1120
1282
  name: "browser_open",
1121
- description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
1283
+ description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
1122
1284
  inputSchema: {
1123
1285
  type: "object",
1124
1286
  properties: {
1125
1287
  url: {
1126
1288
  type: "string",
1127
1289
  description: "The URL to navigate to (must include protocol, e.g. https://)"
1290
+ },
1291
+ proxy: {
1292
+ type: "boolean",
1293
+ description: "Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, \u2026) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked."
1128
1294
  }
1129
1295
  },
1130
1296
  required: ["url"]
@@ -1134,7 +1300,8 @@ function createBrowserTools(getSession) {
1134
1300
  const cid = context.conversationId ?? "__default__";
1135
1301
  const url = String(input.url ?? "");
1136
1302
  if (!url) throw new Error("url is required");
1137
- const result = await session.open(cid, url);
1303
+ const proxy = input.proxy === true;
1304
+ const result = await session.open(cid, url, { proxy });
1138
1305
  session.startScreencast(cid).catch((err) => {
1139
1306
  console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
1140
1307
  });
@@ -1261,6 +1428,47 @@ function createBrowserTools(getSession) {
1261
1428
  return { url: result.url, title: result.title, text: result.text };
1262
1429
  }
1263
1430
  },
1431
+ {
1432
+ name: "browser_download",
1433
+ description: "Download a file from the browser and save it into the user's virtual filesystem (VFS). Fetches the file using the browser's logged-in session, so it works for files behind a login \u2014 use it to keep a PDF, CSV, image, or other file the page offers. It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; for a file that opens in the browser, navigate to it and call this with no url. The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). Returns the saved VFS path and byte size \u2014 the bytes go straight to the VFS, not through the chat.",
1434
+ inputSchema: {
1435
+ type: "object",
1436
+ properties: {
1437
+ path: {
1438
+ type: "string",
1439
+ description: "Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. End with '/' (e.g. /downloads/) to keep the file's own name."
1440
+ },
1441
+ url: {
1442
+ type: "string",
1443
+ description: "URL of the file to download. Optional \u2014 defaults to the current page's URL."
1444
+ }
1445
+ },
1446
+ required: ["path"]
1447
+ },
1448
+ handler: async (input, context) => {
1449
+ const session = getSession();
1450
+ const vfs = context.vfs;
1451
+ if (!vfs) throw new Error("VFS is not available in this environment");
1452
+ const dest0 = String(input.path ?? "").trim();
1453
+ if (!dest0) throw new Error("path is required");
1454
+ const url = input.url != null ? String(input.url) : void 0;
1455
+ const { data, contentType, filename } = await session.download(
1456
+ context.conversationId ?? "__default__",
1457
+ url
1458
+ );
1459
+ let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
1460
+ if (dest.endsWith("/")) dest = `${dest}${filename}`;
1461
+ const slash = dest.lastIndexOf("/");
1462
+ if (slash > 0) {
1463
+ try {
1464
+ await vfs.mkdir(dest.slice(0, slash), { recursive: true });
1465
+ } catch {
1466
+ }
1467
+ }
1468
+ await vfs.writeFile(dest, new Uint8Array(data), contentType || void 0);
1469
+ return { path: dest, bytes: data.length, ...contentType ? { contentType } : {} };
1470
+ }
1471
+ },
1264
1472
  {
1265
1473
  name: "browser_screenshot",
1266
1474
  description: "Take a screenshot of the current page. Returns the image so you can see exactly what the page looks like. Use this when you need to see visual layout, verify actions, or read content that isn't in the accessibility tree.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/browser",
3
- "version": "0.6.26",
3
+ "version": "0.7.1",
4
4
  "description": "Browser automation for Poncho agents, powered by agent-browser",
5
5
  "repository": {
6
6
  "type": "git",
package/src/session.ts CHANGED
@@ -62,6 +62,64 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
62
62
 
63
63
  const MAX_TABS = 8;
64
64
 
65
+ /**
66
+ * Sites that hard-block datacenter IPs (a 403 / "blocked by network security"
67
+ * before any fingerprint check). Navigating to one auto-enables residential
68
+ * proxies. Matched on the registrable-ish suffix so subdomains
69
+ * (old.reddit.com, www.linkedin.com) are covered. The agent can also force
70
+ * proxies on for anything else via `browser_open`'s `proxy` param.
71
+ */
72
+ const PROXY_DOMAINS = [
73
+ "reddit.com",
74
+ "linkedin.com",
75
+ "instagram.com",
76
+ "facebook.com",
77
+ "x.com",
78
+ "twitter.com",
79
+ "tiktok.com",
80
+ "quora.com",
81
+ "pinterest.com",
82
+ ];
83
+
84
+ /** Whether a URL's host is (a subdomain of) a known IP-blocking domain. */
85
+ function shouldProxyFor(url: string): boolean {
86
+ let host: string;
87
+ try { host = new URL(url).hostname.toLowerCase(); }
88
+ catch { return false; }
89
+ return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
90
+ }
91
+
92
+ /**
93
+ * Create a Browserbase session with residential proxies enabled and return its
94
+ * CDP `connectUrl`. Used instead of agent-browser's built-in browserbase path,
95
+ * which hardcodes the create body to `{ projectId }` and so can't turn proxies
96
+ * on. Reads the same `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` env vars
97
+ * agent-browser does.
98
+ */
99
+ async function createBrowserbaseProxiedSession(): Promise<string> {
100
+ const apiKey = process.env.BROWSERBASE_API_KEY;
101
+ const projectId = process.env.BROWSERBASE_PROJECT_ID;
102
+ if (!apiKey || !projectId) {
103
+ throw new Error(
104
+ "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase",
105
+ );
106
+ }
107
+ const res = await fetch("https://api.browserbase.com/v1/sessions", {
108
+ method: "POST",
109
+ headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
110
+ body: JSON.stringify({ projectId, proxies: true }),
111
+ });
112
+ if (!res.ok) {
113
+ const detail = await res.text().catch(() => "");
114
+ throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
115
+ }
116
+ const session = (await res.json()) as { connectUrl?: string };
117
+ if (!session.connectUrl) {
118
+ throw new Error("Browserbase session response missing connectUrl");
119
+ }
120
+ return session.connectUrl;
121
+ }
122
+
65
123
  const VALID_SAME_SITE = ["Strict", "Lax", "None"];
66
124
 
67
125
  /**
@@ -146,6 +204,29 @@ interface ConversationTab {
146
204
  lastUsed: number;
147
205
  }
148
206
 
207
+ /** Strip path separators / nulls so a derived name can't escape its folder. */
208
+ function sanitizeName(name: string): string {
209
+ const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
210
+ return cleaned || "download";
211
+ }
212
+
213
+ /** Derive a filename from a Content-Disposition header, falling back to the
214
+ * URL's last path segment, then a generic "download". */
215
+ function filenameFromDownload(disposition: string, url: string): string {
216
+ const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
217
+ if (star?.[1]) {
218
+ try { return sanitizeName(decodeURIComponent(star[1])); }
219
+ catch { return sanitizeName(star[1]); }
220
+ }
221
+ const plain = /filename=["']?([^"';]+)/i.exec(disposition);
222
+ if (plain?.[1]) return sanitizeName(plain[1]);
223
+ try {
224
+ const base = new URL(url).pathname.split("/").filter(Boolean).pop();
225
+ if (base) return sanitizeName(decodeURIComponent(base));
226
+ } catch { /* not a parseable URL */ }
227
+ return "download";
228
+ }
229
+
149
230
  export class BrowserSession {
150
231
  private readonly config: BrowserConfig;
151
232
  private readonly sessionId: string;
@@ -177,6 +258,15 @@ export class BrowserSession {
177
258
  // Currently screencast conversation (only one at a time due to CDP)
178
259
  private _screencastConversation: string | undefined;
179
260
 
261
+ // Residential-proxy mode. `launchedProxyMode` is what the currently-launched
262
+ // remote session was created with; `proxyEnabled` is the mode the NEXT launch
263
+ // should use. They diverge when a navigation asks for a different mode (a
264
+ // hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
265
+ // fixed at Browserbase-session creation, so switching means recreating the
266
+ // session (see ensureProxyMode). Only meaningful for the browserbase provider.
267
+ private launchedProxyMode = false;
268
+ private proxyEnabled = false;
269
+
180
270
  constructor(sessionId: string, config: BrowserConfig = {}) {
181
271
  this.sessionId = sessionId;
182
272
  this.config = config;
@@ -356,6 +446,16 @@ export class BrowserSession {
356
446
  if (this.config.cdpUrl) {
357
447
  launchOpts.cdpUrl = this.config.cdpUrl;
358
448
  console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
449
+ } else if (this.config.provider === "browserbase" && this.proxyEnabled) {
450
+ // agent-browser's browserbase path creates the session with only
451
+ // { projectId } — no proxy option — so it always lands on a datacenter IP
452
+ // that sites like Reddit block with a 403 before any fingerprint check.
453
+ // Create the session ourselves with residential proxies enabled and hand
454
+ // agent-browser the connectUrl via its cdpUrl path, which bypasses
455
+ // connectToBrowserbase. Stealth, cookie restore, and the screencast are
456
+ // applied below on the connected context, unchanged.
457
+ launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
458
+ console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
359
459
  } else if (this.config.provider) {
360
460
  launchOpts.provider = this.config.provider;
361
461
  console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
@@ -391,6 +491,9 @@ export class BrowserSession {
391
491
  }
392
492
 
393
493
  await mgr.launch(launchOpts as Parameters<BrowserManagerInstance["launch"]>[0]);
494
+ // Record the proxy mode this session was actually launched with, so
495
+ // ensureProxyMode knows whether a later navigation needs a recreate.
496
+ this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
394
497
 
395
498
  // Remote browsers (cloud provider / cdpUrl) ignore launchOpts.viewport —
396
499
  // that's only applied when launching a local context — so the page renders
@@ -542,9 +645,14 @@ export class BrowserSession {
542
645
  // Browser operations (all scoped by conversationId)
543
646
  // -----------------------------------------------------------------------
544
647
 
545
- async open(conversationId: string, url: string): Promise<{ title?: string }> {
648
+ async open(
649
+ conversationId: string,
650
+ url: string,
651
+ opts?: { proxy?: boolean },
652
+ ): Promise<{ title?: string }> {
546
653
  await this.lock();
547
654
  try {
655
+ await this.ensureProxyMode(url, opts?.proxy);
548
656
  return await this._doOpen(conversationId, url);
549
657
  } catch (err: unknown) {
550
658
  const msg = (err as Error)?.message ?? "";
@@ -565,6 +673,33 @@ export class BrowserSession {
565
673
  }
566
674
  }
567
675
 
676
+ /**
677
+ * Reconcile the residential-proxy mode before a navigation. The desired mode
678
+ * is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
679
+ * URL hitting a known IP-blocking domain OR the config default. Since proxies
680
+ * are fixed at Browserbase-session creation, a change tears the live session
681
+ * down so the next ensureManager relaunches proxied. Cookies/localStorage are
682
+ * persisted first and restored on relaunch, so login state survives; open
683
+ * tabs in other conversations of the same session are lost (rare, and only
684
+ * when the mode actually flips). No-op unless the provider is browserbase.
685
+ */
686
+ private async ensureProxyMode(url: string, requested?: boolean): Promise<void> {
687
+ if (this.config.provider !== "browserbase") return;
688
+ const want =
689
+ requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
690
+ this.proxyEnabled = want;
691
+ if (!this.manager || want === this.launchedProxyMode) return;
692
+ console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
693
+ try { await this.persistStorageState(); } catch { /* best-effort */ }
694
+ try { await this.manager.close(); } catch { /* */ }
695
+ this.manager = undefined;
696
+ this._contextStealthInstalled = false;
697
+ this._uaOverrideApplied.clear();
698
+ for (const [, t] of this.tabs) {
699
+ if (t.tabIndex >= 0) { t.tabIndex = -1; t.active = false; t.url = undefined; }
700
+ }
701
+ }
702
+
568
703
  private async _doOpen(conversationId: string, url: string): Promise<{ title?: string }> {
569
704
  const mgr = await this.ensureManager();
570
705
  const tab = await this.switchToConversation(mgr, conversationId);
@@ -655,6 +790,66 @@ export class BrowserSession {
655
790
  }
656
791
  }
657
792
 
793
+ /**
794
+ * Fetch a file using the page's own (logged-in) session and return its
795
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
796
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
797
+ * the site's cookies and works the same whether the browser is local or a
798
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
799
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
800
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
801
+ * same-origin) or pass its direct URL while on that site.
802
+ */
803
+ async download(
804
+ conversationId: string,
805
+ url?: string,
806
+ ): Promise<{ data: Buffer; contentType: string; filename: string }> {
807
+ await this.lock();
808
+ try {
809
+ const mgr = await this.ensureManager();
810
+ await this.switchToConversation(mgr, conversationId);
811
+ const page = mgr.getPage();
812
+ const target = url && url.trim() ? url.trim() : page.url();
813
+ if (!target || target === "about:blank") {
814
+ throw new Error("no URL to download (open the file's page first, or pass a url)");
815
+ }
816
+ const MAX_BYTES = 25 * 1024 * 1024;
817
+ // Build the in-page fetch. JSON.stringify safely escapes the URL into the
818
+ // evaluated source. Base64 in-page so the bytes survive the JSON channel.
819
+ const expr = `(async () => {
820
+ const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
821
+ if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
822
+ const buf = new Uint8Array(await res.arrayBuffer());
823
+ if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
824
+ let bin = "";
825
+ const CH = 0x8000;
826
+ for (let i = 0; i < buf.length; i += CH) {
827
+ bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
828
+ }
829
+ return {
830
+ base64: btoa(bin),
831
+ contentType: res.headers.get("content-type") || "",
832
+ disposition: res.headers.get("content-disposition") || "",
833
+ finalUrl: res.url || ${JSON.stringify(target)},
834
+ };
835
+ })()`;
836
+ const r = (await page.evaluate(expr)) as {
837
+ base64: string;
838
+ contentType: string;
839
+ disposition: string;
840
+ finalUrl: string;
841
+ };
842
+ const data = Buffer.from(r.base64, "base64");
843
+ return {
844
+ data,
845
+ contentType: r.contentType,
846
+ filename: filenameFromDownload(r.disposition, r.finalUrl),
847
+ };
848
+ } finally {
849
+ this.unlock();
850
+ }
851
+ }
852
+
658
853
  async scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void> {
659
854
  await this.lock();
660
855
  try {
package/src/tools.ts CHANGED
@@ -14,6 +14,7 @@ export function createBrowserTools(
14
14
  "This is a HEAVY, last-resort tool — prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. " +
15
15
  "Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). " +
16
16
  "When a site needs credentials, navigate to its login page and let the user sign in directly in the live view — never ask for passwords in chat. " +
17
+ "If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. " +
17
18
  "To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
18
19
  inputSchema: {
19
20
  type: "object",
@@ -22,6 +23,11 @@ export function createBrowserTools(
22
23
  type: "string",
23
24
  description: "The URL to navigate to (must include protocol, e.g. https://)",
24
25
  },
26
+ proxy: {
27
+ type: "boolean",
28
+ description:
29
+ "Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, …) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked.",
30
+ },
25
31
  },
26
32
  required: ["url"],
27
33
  },
@@ -30,7 +36,8 @@ export function createBrowserTools(
30
36
  const cid = context.conversationId ?? "__default__";
31
37
  const url = String(input.url ?? "");
32
38
  if (!url) throw new Error("url is required");
33
- const result = await session.open(cid, url);
39
+ const proxy = input.proxy === true;
40
+ const result = await session.open(cid, url, { proxy });
34
41
  session.startScreencast(cid).catch((err) => {
35
42
  console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
36
43
  });
@@ -176,6 +183,55 @@ export function createBrowserTools(
176
183
  return { url: result.url, title: result.title, text: result.text };
177
184
  },
178
185
  },
186
+ {
187
+ name: "browser_download",
188
+ description:
189
+ "Download a file from the browser and save it into the user's virtual filesystem (VFS). " +
190
+ "Fetches the file using the browser's logged-in session, so it works for files behind a login — " +
191
+ "use it to keep a PDF, CSV, image, or other file the page offers. " +
192
+ "It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; " +
193
+ "for a file that opens in the browser, navigate to it and call this with no url. " +
194
+ "The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). " +
195
+ "Returns the saved VFS path and byte size — the bytes go straight to the VFS, not through the chat.",
196
+ inputSchema: {
197
+ type: "object",
198
+ properties: {
199
+ path: {
200
+ type: "string",
201
+ description:
202
+ "Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. " +
203
+ "End with '/' (e.g. /downloads/) to keep the file's own name.",
204
+ },
205
+ url: {
206
+ type: "string",
207
+ description:
208
+ "URL of the file to download. Optional — defaults to the current page's URL.",
209
+ },
210
+ },
211
+ required: ["path"],
212
+ },
213
+ handler: async (input: BrowserToolInput, context: ToolContext) => {
214
+ const session = getSession();
215
+ const vfs = context.vfs;
216
+ if (!vfs) throw new Error("VFS is not available in this environment");
217
+ const dest0 = String(input.path ?? "").trim();
218
+ if (!dest0) throw new Error("path is required");
219
+ const url = input.url != null ? String(input.url) : undefined;
220
+ const { data, contentType, filename } = await session.download(
221
+ context.conversationId ?? "__default__",
222
+ url,
223
+ );
224
+ // A trailing slash (or bare folder) means "use the file's own name".
225
+ let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
226
+ if (dest.endsWith("/")) dest = `${dest}${filename}`;
227
+ const slash = dest.lastIndexOf("/");
228
+ if (slash > 0) {
229
+ try { await vfs.mkdir(dest.slice(0, slash), { recursive: true }); } catch { /* exists */ }
230
+ }
231
+ await vfs.writeFile(dest, new Uint8Array(data), contentType || undefined);
232
+ return { path: dest, bytes: data.length, ...(contentType ? { contentType } : {}) };
233
+ },
234
+ },
179
235
  {
180
236
  name: "browser_screenshot",
181
237
  description:
package/src/types.ts CHANGED
@@ -77,4 +77,10 @@ export interface BrowserConfig {
77
77
  /** Connect to an existing browser via Chrome DevTools Protocol URL or port.
78
78
  * Mutually exclusive with `provider`. */
79
79
  cdpUrl?: string;
80
+ /** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
81
+ * hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
82
+ * fingerprint check. Known such domains are always proxied automatically, and
83
+ * `browser_open` can force it per-navigation; set this `true` to proxy every
84
+ * session by default. Billed per GB by Browserbase. Browserbase provider only. */
85
+ proxies?: boolean;
80
86
  }