@poncho-ai/browser 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +20 -0
- package/dist/index.d.ts +22 -1
- package/dist/index.js +99 -3
- package/package.json +1 -1
- package/src/session.ts +113 -1
- package/src/tools.ts +8 -1
- package/src/types.ts +6 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/browser@0.7.
|
|
2
|
+
> @poncho-ai/browser@0.7.1 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
|
|
3
3
|
> tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
[34mCLI[39m tsup v8.5.1
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
11
|
-
[32mESM[39m ⚡️ Build success in
|
|
10
|
+
[32mESM[39m [1mdist/index.js [22m[32m57.26 KB[39m
|
|
11
|
+
[32mESM[39m ⚡️ Build success in 77ms
|
|
12
12
|
[34mDTS[39m Build start
|
|
13
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[
|
|
13
|
+
[32mDTS[39m ⚡️ Build success in 5467ms
|
|
14
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m15.75 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @poncho-ai/browser
|
|
2
2
|
|
|
3
|
+
## 0.7.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#188](https://github.com/cesr/poncho-ai/pull/188) [`97772cc`](https://github.com/cesr/poncho-ai/commit/97772ccf2c07ec3a3f3350ef3a65596fba91a154) Thanks [@cesr](https://github.com/cesr)! - Add residential-proxy support for Browserbase sessions so IP-reputation walls
|
|
8
|
+
(Reddit, LinkedIn, Instagram, …) stop returning 403 "blocked by network
|
|
9
|
+
security". Datacenter IPs are blocked before any fingerprint check, so stealth
|
|
10
|
+
alone can't get past them.
|
|
11
|
+
- Known IP-blocking domains are proxied automatically (domain gate).
|
|
12
|
+
- `browser_open` gains a `proxy` param so the agent can retry any other site
|
|
13
|
+
that blocked it through a residential IP.
|
|
14
|
+
- `BrowserConfig.proxies` sets the default mode for every session.
|
|
15
|
+
|
|
16
|
+
Because proxies are fixed at Browserbase-session creation (and Vercel's
|
|
17
|
+
agent-browser hardcodes the create body to `{ projectId }`), we create the
|
|
18
|
+
Browserbase session ourselves with `proxies: true` and connect agent-browser to
|
|
19
|
+
it via its `cdpUrl` path. Switching proxy mode mid-conversation recreates the
|
|
20
|
+
session; cookies/localStorage are persisted and restored across the recreate,
|
|
21
|
+
so login state survives.
|
|
22
|
+
|
|
3
23
|
## 0.7.0
|
|
4
24
|
|
|
5
25
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -71,6 +71,12 @@ interface BrowserConfig {
|
|
|
71
71
|
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
72
72
|
* Mutually exclusive with `provider`. */
|
|
73
73
|
cdpUrl?: string;
|
|
74
|
+
/** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
|
|
75
|
+
* hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
|
|
76
|
+
* fingerprint check. Known such domains are always proxied automatically, and
|
|
77
|
+
* `browser_open` can force it per-navigation; set this `true` to proxy every
|
|
78
|
+
* session by default. Billed per GB by Browserbase. Browserbase provider only. */
|
|
79
|
+
proxies?: boolean;
|
|
74
80
|
}
|
|
75
81
|
|
|
76
82
|
type FrameListener = (frame: BrowserFrame) => void;
|
|
@@ -87,6 +93,8 @@ declare class BrowserSession {
|
|
|
87
93
|
private _lockQueue;
|
|
88
94
|
private _locked;
|
|
89
95
|
private _screencastConversation;
|
|
96
|
+
private launchedProxyMode;
|
|
97
|
+
private proxyEnabled;
|
|
90
98
|
constructor(sessionId: string, config?: BrowserConfig);
|
|
91
99
|
get profileDir(): string;
|
|
92
100
|
private lock;
|
|
@@ -131,9 +139,22 @@ declare class BrowserSession {
|
|
|
131
139
|
getUrl(conversationId: string): string | undefined;
|
|
132
140
|
/** Whether the browser has been launched. */
|
|
133
141
|
get isLaunched(): boolean;
|
|
134
|
-
open(conversationId: string, url: string
|
|
142
|
+
open(conversationId: string, url: string, opts?: {
|
|
143
|
+
proxy?: boolean;
|
|
144
|
+
}): Promise<{
|
|
135
145
|
title?: string;
|
|
136
146
|
}>;
|
|
147
|
+
/**
|
|
148
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
149
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
150
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
151
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
152
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
153
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
154
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
155
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
156
|
+
*/
|
|
157
|
+
private ensureProxyMode;
|
|
137
158
|
private _doOpen;
|
|
138
159
|
snapshot(conversationId: string): Promise<string>;
|
|
139
160
|
click(conversationId: string, ref: string): Promise<void>;
|
package/dist/index.js
CHANGED
|
@@ -186,6 +186,49 @@ async function getBrowserManagerCtor() {
|
|
|
186
186
|
return BrowserManagerCtor;
|
|
187
187
|
}
|
|
188
188
|
var MAX_TABS = 8;
|
|
189
|
+
var PROXY_DOMAINS = [
|
|
190
|
+
"reddit.com",
|
|
191
|
+
"linkedin.com",
|
|
192
|
+
"instagram.com",
|
|
193
|
+
"facebook.com",
|
|
194
|
+
"x.com",
|
|
195
|
+
"twitter.com",
|
|
196
|
+
"tiktok.com",
|
|
197
|
+
"quora.com",
|
|
198
|
+
"pinterest.com"
|
|
199
|
+
];
|
|
200
|
+
function shouldProxyFor(url) {
|
|
201
|
+
let host;
|
|
202
|
+
try {
|
|
203
|
+
host = new URL(url).hostname.toLowerCase();
|
|
204
|
+
} catch {
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
|
|
208
|
+
}
|
|
209
|
+
async function createBrowserbaseProxiedSession() {
|
|
210
|
+
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
211
|
+
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
212
|
+
if (!apiKey || !projectId) {
|
|
213
|
+
throw new Error(
|
|
214
|
+
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase"
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
const res = await fetch("https://api.browserbase.com/v1/sessions", {
|
|
218
|
+
method: "POST",
|
|
219
|
+
headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
|
|
220
|
+
body: JSON.stringify({ projectId, proxies: true })
|
|
221
|
+
});
|
|
222
|
+
if (!res.ok) {
|
|
223
|
+
const detail = await res.text().catch(() => "");
|
|
224
|
+
throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
|
|
225
|
+
}
|
|
226
|
+
const session = await res.json();
|
|
227
|
+
if (!session.connectUrl) {
|
|
228
|
+
throw new Error("Browserbase session response missing connectUrl");
|
|
229
|
+
}
|
|
230
|
+
return session.connectUrl;
|
|
231
|
+
}
|
|
189
232
|
var VALID_SAME_SITE = ["Strict", "Lax", "None"];
|
|
190
233
|
function sanitizeCookieForCDP(c) {
|
|
191
234
|
const name = typeof c.name === "string" ? c.name : "";
|
|
@@ -290,6 +333,14 @@ var BrowserSession = class {
|
|
|
290
333
|
_locked = false;
|
|
291
334
|
// Currently screencast conversation (only one at a time due to CDP)
|
|
292
335
|
_screencastConversation;
|
|
336
|
+
// Residential-proxy mode. `launchedProxyMode` is what the currently-launched
|
|
337
|
+
// remote session was created with; `proxyEnabled` is the mode the NEXT launch
|
|
338
|
+
// should use. They diverge when a navigation asks for a different mode (a
|
|
339
|
+
// hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
|
|
340
|
+
// fixed at Browserbase-session creation, so switching means recreating the
|
|
341
|
+
// session (see ensureProxyMode). Only meaningful for the browserbase provider.
|
|
342
|
+
launchedProxyMode = false;
|
|
343
|
+
proxyEnabled = false;
|
|
293
344
|
constructor(sessionId, config = {}) {
|
|
294
345
|
this.sessionId = sessionId;
|
|
295
346
|
this.config = config;
|
|
@@ -442,6 +493,9 @@ var BrowserSession = class {
|
|
|
442
493
|
if (this.config.cdpUrl) {
|
|
443
494
|
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
444
495
|
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
496
|
+
} else if (this.config.provider === "browserbase" && this.proxyEnabled) {
|
|
497
|
+
launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
|
|
498
|
+
console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
|
|
445
499
|
} else if (this.config.provider) {
|
|
446
500
|
launchOpts.provider = this.config.provider;
|
|
447
501
|
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
@@ -470,6 +524,7 @@ var BrowserSession = class {
|
|
|
470
524
|
launchOpts.args = baseArgs;
|
|
471
525
|
}
|
|
472
526
|
await mgr.launch(launchOpts);
|
|
527
|
+
this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
|
|
473
528
|
if (this.isRemote) {
|
|
474
529
|
try {
|
|
475
530
|
await mgr.setViewport(viewport.width ?? 1280, viewport.height ?? 720);
|
|
@@ -609,9 +664,10 @@ var BrowserSession = class {
|
|
|
609
664
|
// -----------------------------------------------------------------------
|
|
610
665
|
// Browser operations (all scoped by conversationId)
|
|
611
666
|
// -----------------------------------------------------------------------
|
|
612
|
-
async open(conversationId, url) {
|
|
667
|
+
async open(conversationId, url, opts) {
|
|
613
668
|
await this.lock();
|
|
614
669
|
try {
|
|
670
|
+
await this.ensureProxyMode(url, opts?.proxy);
|
|
615
671
|
return await this._doOpen(conversationId, url);
|
|
616
672
|
} catch (err) {
|
|
617
673
|
const msg = err?.message ?? "";
|
|
@@ -638,6 +694,41 @@ var BrowserSession = class {
|
|
|
638
694
|
this.unlock();
|
|
639
695
|
}
|
|
640
696
|
}
|
|
697
|
+
/**
|
|
698
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
699
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
700
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
701
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
702
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
703
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
704
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
705
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
706
|
+
*/
|
|
707
|
+
async ensureProxyMode(url, requested) {
|
|
708
|
+
if (this.config.provider !== "browserbase") return;
|
|
709
|
+
const want = requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
|
|
710
|
+
this.proxyEnabled = want;
|
|
711
|
+
if (!this.manager || want === this.launchedProxyMode) return;
|
|
712
|
+
console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
|
|
713
|
+
try {
|
|
714
|
+
await this.persistStorageState();
|
|
715
|
+
} catch {
|
|
716
|
+
}
|
|
717
|
+
try {
|
|
718
|
+
await this.manager.close();
|
|
719
|
+
} catch {
|
|
720
|
+
}
|
|
721
|
+
this.manager = void 0;
|
|
722
|
+
this._contextStealthInstalled = false;
|
|
723
|
+
this._uaOverrideApplied.clear();
|
|
724
|
+
for (const [, t] of this.tabs) {
|
|
725
|
+
if (t.tabIndex >= 0) {
|
|
726
|
+
t.tabIndex = -1;
|
|
727
|
+
t.active = false;
|
|
728
|
+
t.url = void 0;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
641
732
|
async _doOpen(conversationId, url) {
|
|
642
733
|
const mgr = await this.ensureManager();
|
|
643
734
|
const tab = await this.switchToConversation(mgr, conversationId);
|
|
@@ -1189,13 +1280,17 @@ function createBrowserTools(getSession) {
|
|
|
1189
1280
|
return [
|
|
1190
1281
|
{
|
|
1191
1282
|
name: "browser_open",
|
|
1192
|
-
description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
1283
|
+
description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
1193
1284
|
inputSchema: {
|
|
1194
1285
|
type: "object",
|
|
1195
1286
|
properties: {
|
|
1196
1287
|
url: {
|
|
1197
1288
|
type: "string",
|
|
1198
1289
|
description: "The URL to navigate to (must include protocol, e.g. https://)"
|
|
1290
|
+
},
|
|
1291
|
+
proxy: {
|
|
1292
|
+
type: "boolean",
|
|
1293
|
+
description: "Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, \u2026) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked."
|
|
1199
1294
|
}
|
|
1200
1295
|
},
|
|
1201
1296
|
required: ["url"]
|
|
@@ -1205,7 +1300,8 @@ function createBrowserTools(getSession) {
|
|
|
1205
1300
|
const cid = context.conversationId ?? "__default__";
|
|
1206
1301
|
const url = String(input.url ?? "");
|
|
1207
1302
|
if (!url) throw new Error("url is required");
|
|
1208
|
-
const
|
|
1303
|
+
const proxy = input.proxy === true;
|
|
1304
|
+
const result = await session.open(cid, url, { proxy });
|
|
1209
1305
|
session.startScreencast(cid).catch((err) => {
|
|
1210
1306
|
console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
|
|
1211
1307
|
});
|
package/package.json
CHANGED
package/src/session.ts
CHANGED
|
@@ -62,6 +62,64 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
|
|
|
62
62
|
|
|
63
63
|
const MAX_TABS = 8;
|
|
64
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Sites that hard-block datacenter IPs (a 403 / "blocked by network security"
|
|
67
|
+
* before any fingerprint check). Navigating to one auto-enables residential
|
|
68
|
+
* proxies. Matched on the registrable-ish suffix so subdomains
|
|
69
|
+
* (old.reddit.com, www.linkedin.com) are covered. The agent can also force
|
|
70
|
+
* proxies on for anything else via `browser_open`'s `proxy` param.
|
|
71
|
+
*/
|
|
72
|
+
const PROXY_DOMAINS = [
|
|
73
|
+
"reddit.com",
|
|
74
|
+
"linkedin.com",
|
|
75
|
+
"instagram.com",
|
|
76
|
+
"facebook.com",
|
|
77
|
+
"x.com",
|
|
78
|
+
"twitter.com",
|
|
79
|
+
"tiktok.com",
|
|
80
|
+
"quora.com",
|
|
81
|
+
"pinterest.com",
|
|
82
|
+
];
|
|
83
|
+
|
|
84
|
+
/** Whether a URL's host is (a subdomain of) a known IP-blocking domain. */
|
|
85
|
+
function shouldProxyFor(url: string): boolean {
|
|
86
|
+
let host: string;
|
|
87
|
+
try { host = new URL(url).hostname.toLowerCase(); }
|
|
88
|
+
catch { return false; }
|
|
89
|
+
return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Create a Browserbase session with residential proxies enabled and return its
|
|
94
|
+
* CDP `connectUrl`. Used instead of agent-browser's built-in browserbase path,
|
|
95
|
+
* which hardcodes the create body to `{ projectId }` and so can't turn proxies
|
|
96
|
+
* on. Reads the same `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` env vars
|
|
97
|
+
* agent-browser does.
|
|
98
|
+
*/
|
|
99
|
+
async function createBrowserbaseProxiedSession(): Promise<string> {
|
|
100
|
+
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
101
|
+
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
102
|
+
if (!apiKey || !projectId) {
|
|
103
|
+
throw new Error(
|
|
104
|
+
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase",
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
const res = await fetch("https://api.browserbase.com/v1/sessions", {
|
|
108
|
+
method: "POST",
|
|
109
|
+
headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
|
|
110
|
+
body: JSON.stringify({ projectId, proxies: true }),
|
|
111
|
+
});
|
|
112
|
+
if (!res.ok) {
|
|
113
|
+
const detail = await res.text().catch(() => "");
|
|
114
|
+
throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
|
|
115
|
+
}
|
|
116
|
+
const session = (await res.json()) as { connectUrl?: string };
|
|
117
|
+
if (!session.connectUrl) {
|
|
118
|
+
throw new Error("Browserbase session response missing connectUrl");
|
|
119
|
+
}
|
|
120
|
+
return session.connectUrl;
|
|
121
|
+
}
|
|
122
|
+
|
|
65
123
|
const VALID_SAME_SITE = ["Strict", "Lax", "None"];
|
|
66
124
|
|
|
67
125
|
/**
|
|
@@ -200,6 +258,15 @@ export class BrowserSession {
|
|
|
200
258
|
// Currently screencast conversation (only one at a time due to CDP)
|
|
201
259
|
private _screencastConversation: string | undefined;
|
|
202
260
|
|
|
261
|
+
// Residential-proxy mode. `launchedProxyMode` is what the currently-launched
|
|
262
|
+
// remote session was created with; `proxyEnabled` is the mode the NEXT launch
|
|
263
|
+
// should use. They diverge when a navigation asks for a different mode (a
|
|
264
|
+
// hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
|
|
265
|
+
// fixed at Browserbase-session creation, so switching means recreating the
|
|
266
|
+
// session (see ensureProxyMode). Only meaningful for the browserbase provider.
|
|
267
|
+
private launchedProxyMode = false;
|
|
268
|
+
private proxyEnabled = false;
|
|
269
|
+
|
|
203
270
|
constructor(sessionId: string, config: BrowserConfig = {}) {
|
|
204
271
|
this.sessionId = sessionId;
|
|
205
272
|
this.config = config;
|
|
@@ -379,6 +446,16 @@ export class BrowserSession {
|
|
|
379
446
|
if (this.config.cdpUrl) {
|
|
380
447
|
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
381
448
|
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
449
|
+
} else if (this.config.provider === "browserbase" && this.proxyEnabled) {
|
|
450
|
+
// agent-browser's browserbase path creates the session with only
|
|
451
|
+
// { projectId } — no proxy option — so it always lands on a datacenter IP
|
|
452
|
+
// that sites like Reddit block with a 403 before any fingerprint check.
|
|
453
|
+
// Create the session ourselves with residential proxies enabled and hand
|
|
454
|
+
// agent-browser the connectUrl via its cdpUrl path, which bypasses
|
|
455
|
+
// connectToBrowserbase. Stealth, cookie restore, and the screencast are
|
|
456
|
+
// applied below on the connected context, unchanged.
|
|
457
|
+
launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
|
|
458
|
+
console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
|
|
382
459
|
} else if (this.config.provider) {
|
|
383
460
|
launchOpts.provider = this.config.provider;
|
|
384
461
|
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
@@ -414,6 +491,9 @@ export class BrowserSession {
|
|
|
414
491
|
}
|
|
415
492
|
|
|
416
493
|
await mgr.launch(launchOpts as Parameters<BrowserManagerInstance["launch"]>[0]);
|
|
494
|
+
// Record the proxy mode this session was actually launched with, so
|
|
495
|
+
// ensureProxyMode knows whether a later navigation needs a recreate.
|
|
496
|
+
this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
|
|
417
497
|
|
|
418
498
|
// Remote browsers (cloud provider / cdpUrl) ignore launchOpts.viewport —
|
|
419
499
|
// that's only applied when launching a local context — so the page renders
|
|
@@ -565,9 +645,14 @@ export class BrowserSession {
|
|
|
565
645
|
// Browser operations (all scoped by conversationId)
|
|
566
646
|
// -----------------------------------------------------------------------
|
|
567
647
|
|
|
568
|
-
async open(
|
|
648
|
+
async open(
|
|
649
|
+
conversationId: string,
|
|
650
|
+
url: string,
|
|
651
|
+
opts?: { proxy?: boolean },
|
|
652
|
+
): Promise<{ title?: string }> {
|
|
569
653
|
await this.lock();
|
|
570
654
|
try {
|
|
655
|
+
await this.ensureProxyMode(url, opts?.proxy);
|
|
571
656
|
return await this._doOpen(conversationId, url);
|
|
572
657
|
} catch (err: unknown) {
|
|
573
658
|
const msg = (err as Error)?.message ?? "";
|
|
@@ -588,6 +673,33 @@ export class BrowserSession {
|
|
|
588
673
|
}
|
|
589
674
|
}
|
|
590
675
|
|
|
676
|
+
/**
|
|
677
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
678
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
679
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
680
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
681
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
682
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
683
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
684
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
685
|
+
*/
|
|
686
|
+
private async ensureProxyMode(url: string, requested?: boolean): Promise<void> {
|
|
687
|
+
if (this.config.provider !== "browserbase") return;
|
|
688
|
+
const want =
|
|
689
|
+
requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
|
|
690
|
+
this.proxyEnabled = want;
|
|
691
|
+
if (!this.manager || want === this.launchedProxyMode) return;
|
|
692
|
+
console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
|
|
693
|
+
try { await this.persistStorageState(); } catch { /* best-effort */ }
|
|
694
|
+
try { await this.manager.close(); } catch { /* */ }
|
|
695
|
+
this.manager = undefined;
|
|
696
|
+
this._contextStealthInstalled = false;
|
|
697
|
+
this._uaOverrideApplied.clear();
|
|
698
|
+
for (const [, t] of this.tabs) {
|
|
699
|
+
if (t.tabIndex >= 0) { t.tabIndex = -1; t.active = false; t.url = undefined; }
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
591
703
|
private async _doOpen(conversationId: string, url: string): Promise<{ title?: string }> {
|
|
592
704
|
const mgr = await this.ensureManager();
|
|
593
705
|
const tab = await this.switchToConversation(mgr, conversationId);
|
package/src/tools.ts
CHANGED
|
@@ -14,6 +14,7 @@ export function createBrowserTools(
|
|
|
14
14
|
"This is a HEAVY, last-resort tool — prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. " +
|
|
15
15
|
"Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). " +
|
|
16
16
|
"When a site needs credentials, navigate to its login page and let the user sign in directly in the live view — never ask for passwords in chat. " +
|
|
17
|
+
"If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. " +
|
|
17
18
|
"To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
18
19
|
inputSchema: {
|
|
19
20
|
type: "object",
|
|
@@ -22,6 +23,11 @@ export function createBrowserTools(
|
|
|
22
23
|
type: "string",
|
|
23
24
|
description: "The URL to navigate to (must include protocol, e.g. https://)",
|
|
24
25
|
},
|
|
26
|
+
proxy: {
|
|
27
|
+
type: "boolean",
|
|
28
|
+
description:
|
|
29
|
+
"Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, …) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked.",
|
|
30
|
+
},
|
|
25
31
|
},
|
|
26
32
|
required: ["url"],
|
|
27
33
|
},
|
|
@@ -30,7 +36,8 @@ export function createBrowserTools(
|
|
|
30
36
|
const cid = context.conversationId ?? "__default__";
|
|
31
37
|
const url = String(input.url ?? "");
|
|
32
38
|
if (!url) throw new Error("url is required");
|
|
33
|
-
const
|
|
39
|
+
const proxy = input.proxy === true;
|
|
40
|
+
const result = await session.open(cid, url, { proxy });
|
|
34
41
|
session.startScreencast(cid).catch((err) => {
|
|
35
42
|
console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
|
|
36
43
|
});
|
package/src/types.ts
CHANGED
|
@@ -77,4 +77,10 @@ export interface BrowserConfig {
|
|
|
77
77
|
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
78
78
|
* Mutually exclusive with `provider`. */
|
|
79
79
|
cdpUrl?: string;
|
|
80
|
+
/** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
|
|
81
|
+
* hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
|
|
82
|
+
* fingerprint check. Known such domains are always proxied automatically, and
|
|
83
|
+
* `browser_open` can force it per-navigation; set this `true` to proxy every
|
|
84
|
+
* session by default. Billed per GB by Browserbase. Browserbase provider only. */
|
|
85
|
+
proxies?: boolean;
|
|
80
86
|
}
|