@poncho-ai/browser 0.6.26 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +33 -0
- package/dist/index.d.ts +37 -1
- package/dist/index.js +211 -3
- package/package.json +1 -1
- package/src/session.ts +196 -1
- package/src/tools.ts +57 -1
- package/src/types.ts +6 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/browser@0.
|
|
2
|
+
> @poncho-ai/browser@0.7.1 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
|
|
3
3
|
> tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
[34mCLI[39m tsup v8.5.1
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
11
|
-
[32mESM[39m ⚡️ Build success in
|
|
10
|
+
[32mESM[39m [1mdist/index.js [22m[32m57.26 KB[39m
|
|
11
|
+
[32mESM[39m ⚡️ Build success in 77ms
|
|
12
12
|
[34mDTS[39m Build start
|
|
13
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[
|
|
13
|
+
[32mDTS[39m ⚡️ Build success in 5467ms
|
|
14
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m15.75 KB[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# @poncho-ai/browser
|
|
2
2
|
|
|
3
|
+
## 0.7.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#188](https://github.com/cesr/poncho-ai/pull/188) [`97772cc`](https://github.com/cesr/poncho-ai/commit/97772ccf2c07ec3a3f3350ef3a65596fba91a154) Thanks [@cesr](https://github.com/cesr)! - Add residential-proxy support for Browserbase sessions so IP-reputation walls
|
|
8
|
+
(Reddit, LinkedIn, Instagram, …) stop returning 403 "blocked by network
|
|
9
|
+
security". Datacenter IPs are blocked before any fingerprint check, so stealth
|
|
10
|
+
alone can't get past them.
|
|
11
|
+
- Known IP-blocking domains are proxied automatically (domain gate).
|
|
12
|
+
- `browser_open` gains a `proxy` param so the agent can retry any other site
|
|
13
|
+
that blocked it through a residential IP.
|
|
14
|
+
- `BrowserConfig.proxies` sets the default mode for every session.
|
|
15
|
+
|
|
16
|
+
Because proxies are fixed at Browserbase-session creation (and Vercel's
|
|
17
|
+
agent-browser hardcodes the create body to `{ projectId }`), we create the
|
|
18
|
+
Browserbase session ourselves with `proxies: true` and connect agent-browser to
|
|
19
|
+
it via its `cdpUrl` path. Switching proxy mode mid-conversation recreates the
|
|
20
|
+
session; cookies/localStorage are persisted and restored across the recreate,
|
|
21
|
+
so login state survives.
|
|
22
|
+
|
|
23
|
+
## 0.7.0
|
|
24
|
+
|
|
25
|
+
### Minor Changes
|
|
26
|
+
|
|
27
|
+
- [#184](https://github.com/cesr/poncho-ai/pull/184) [`12ce2be`](https://github.com/cesr/poncho-ai/commit/12ce2be01c9d98b1d9aa634d4d8051c4c0094a44) Thanks [@cesr](https://github.com/cesr)! - Add `browser_download` so the agent can save files from the browser into the
|
|
28
|
+
VFS. The tool fetches a file using the page's logged-in session (so it works
|
|
29
|
+
for files behind a login) and writes the bytes straight to the tenant's VFS via
|
|
30
|
+
`ToolContext.vfs` — never through the model. `url` defaults to the current page,
|
|
31
|
+
or pass a same-origin link's href. The fetch runs inside the page (`evaluate`),
|
|
32
|
+
so it works identically for local and remote/cloud browsers (bytes return over
|
|
33
|
+
CDP). Capped at 25 MB. The harness browser system prompt now documents it under
|
|
34
|
+
a "Saving files" section.
|
|
35
|
+
|
|
3
36
|
## 0.6.26
|
|
4
37
|
|
|
5
38
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -71,6 +71,12 @@ interface BrowserConfig {
|
|
|
71
71
|
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
72
72
|
* Mutually exclusive with `provider`. */
|
|
73
73
|
cdpUrl?: string;
|
|
74
|
+
/** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
|
|
75
|
+
* hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
|
|
76
|
+
* fingerprint check. Known such domains are always proxied automatically, and
|
|
77
|
+
* `browser_open` can force it per-navigation; set this `true` to proxy every
|
|
78
|
+
* session by default. Billed per GB by Browserbase. Browserbase provider only. */
|
|
79
|
+
proxies?: boolean;
|
|
74
80
|
}
|
|
75
81
|
|
|
76
82
|
type FrameListener = (frame: BrowserFrame) => void;
|
|
@@ -87,6 +93,8 @@ declare class BrowserSession {
|
|
|
87
93
|
private _lockQueue;
|
|
88
94
|
private _locked;
|
|
89
95
|
private _screencastConversation;
|
|
96
|
+
private launchedProxyMode;
|
|
97
|
+
private proxyEnabled;
|
|
90
98
|
constructor(sessionId: string, config?: BrowserConfig);
|
|
91
99
|
get profileDir(): string;
|
|
92
100
|
private lock;
|
|
@@ -131,9 +139,22 @@ declare class BrowserSession {
|
|
|
131
139
|
getUrl(conversationId: string): string | undefined;
|
|
132
140
|
/** Whether the browser has been launched. */
|
|
133
141
|
get isLaunched(): boolean;
|
|
134
|
-
open(conversationId: string, url: string
|
|
142
|
+
open(conversationId: string, url: string, opts?: {
|
|
143
|
+
proxy?: boolean;
|
|
144
|
+
}): Promise<{
|
|
135
145
|
title?: string;
|
|
136
146
|
}>;
|
|
147
|
+
/**
|
|
148
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
149
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
150
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
151
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
152
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
153
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
154
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
155
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
156
|
+
*/
|
|
157
|
+
private ensureProxyMode;
|
|
137
158
|
private _doOpen;
|
|
138
159
|
snapshot(conversationId: string): Promise<string>;
|
|
139
160
|
click(conversationId: string, ref: string): Promise<void>;
|
|
@@ -144,6 +165,21 @@ declare class BrowserSession {
|
|
|
144
165
|
url: string;
|
|
145
166
|
title: string;
|
|
146
167
|
}>;
|
|
168
|
+
/**
|
|
169
|
+
* Fetch a file using the page's own (logged-in) session and return its
|
|
170
|
+
* bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
|
|
171
|
+
* current page. The fetch runs INSIDE the page via `evaluate`, so it carries
|
|
172
|
+
* the site's cookies and works the same whether the browser is local or a
|
|
173
|
+
* remote/cloud provider (the bytes come back over CDP). Because it's a page
|
|
174
|
+
* `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
|
|
175
|
+
* site doesn't allow CORS for will fail — navigate to the file first (so it's
|
|
176
|
+
* same-origin) or pass its direct URL while on that site.
|
|
177
|
+
*/
|
|
178
|
+
download(conversationId: string, url?: string): Promise<{
|
|
179
|
+
data: Buffer;
|
|
180
|
+
contentType: string;
|
|
181
|
+
filename: string;
|
|
182
|
+
}>;
|
|
147
183
|
scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
|
|
148
184
|
clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
|
|
149
185
|
executeJs(conversationId: string, script: string): Promise<unknown>;
|
package/dist/index.js
CHANGED
|
@@ -186,6 +186,49 @@ async function getBrowserManagerCtor() {
|
|
|
186
186
|
return BrowserManagerCtor;
|
|
187
187
|
}
|
|
188
188
|
var MAX_TABS = 8;
|
|
189
|
+
var PROXY_DOMAINS = [
|
|
190
|
+
"reddit.com",
|
|
191
|
+
"linkedin.com",
|
|
192
|
+
"instagram.com",
|
|
193
|
+
"facebook.com",
|
|
194
|
+
"x.com",
|
|
195
|
+
"twitter.com",
|
|
196
|
+
"tiktok.com",
|
|
197
|
+
"quora.com",
|
|
198
|
+
"pinterest.com"
|
|
199
|
+
];
|
|
200
|
+
function shouldProxyFor(url) {
|
|
201
|
+
let host;
|
|
202
|
+
try {
|
|
203
|
+
host = new URL(url).hostname.toLowerCase();
|
|
204
|
+
} catch {
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
|
|
208
|
+
}
|
|
209
|
+
async function createBrowserbaseProxiedSession() {
|
|
210
|
+
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
211
|
+
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
212
|
+
if (!apiKey || !projectId) {
|
|
213
|
+
throw new Error(
|
|
214
|
+
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase"
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
const res = await fetch("https://api.browserbase.com/v1/sessions", {
|
|
218
|
+
method: "POST",
|
|
219
|
+
headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
|
|
220
|
+
body: JSON.stringify({ projectId, proxies: true })
|
|
221
|
+
});
|
|
222
|
+
if (!res.ok) {
|
|
223
|
+
const detail = await res.text().catch(() => "");
|
|
224
|
+
throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
|
|
225
|
+
}
|
|
226
|
+
const session = await res.json();
|
|
227
|
+
if (!session.connectUrl) {
|
|
228
|
+
throw new Error("Browserbase session response missing connectUrl");
|
|
229
|
+
}
|
|
230
|
+
return session.connectUrl;
|
|
231
|
+
}
|
|
189
232
|
var VALID_SAME_SITE = ["Strict", "Lax", "None"];
|
|
190
233
|
function sanitizeCookieForCDP(c) {
|
|
191
234
|
const name = typeof c.name === "string" ? c.name : "";
|
|
@@ -244,6 +287,28 @@ var SAME_TAB_INIT_SCRIPT = `
|
|
|
244
287
|
} catch {}
|
|
245
288
|
})();
|
|
246
289
|
`;
|
|
290
|
+
function sanitizeName(name) {
|
|
291
|
+
const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
|
|
292
|
+
return cleaned || "download";
|
|
293
|
+
}
|
|
294
|
+
function filenameFromDownload(disposition, url) {
|
|
295
|
+
const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
|
|
296
|
+
if (star?.[1]) {
|
|
297
|
+
try {
|
|
298
|
+
return sanitizeName(decodeURIComponent(star[1]));
|
|
299
|
+
} catch {
|
|
300
|
+
return sanitizeName(star[1]);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
const plain = /filename=["']?([^"';]+)/i.exec(disposition);
|
|
304
|
+
if (plain?.[1]) return sanitizeName(plain[1]);
|
|
305
|
+
try {
|
|
306
|
+
const base = new URL(url).pathname.split("/").filter(Boolean).pop();
|
|
307
|
+
if (base) return sanitizeName(decodeURIComponent(base));
|
|
308
|
+
} catch {
|
|
309
|
+
}
|
|
310
|
+
return "download";
|
|
311
|
+
}
|
|
247
312
|
var BrowserSession = class {
|
|
248
313
|
config;
|
|
249
314
|
sessionId;
|
|
@@ -268,6 +333,14 @@ var BrowserSession = class {
|
|
|
268
333
|
_locked = false;
|
|
269
334
|
// Currently screencast conversation (only one at a time due to CDP)
|
|
270
335
|
_screencastConversation;
|
|
336
|
+
// Residential-proxy mode. `launchedProxyMode` is what the currently-launched
|
|
337
|
+
// remote session was created with; `proxyEnabled` is the mode the NEXT launch
|
|
338
|
+
// should use. They diverge when a navigation asks for a different mode (a
|
|
339
|
+
// hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
|
|
340
|
+
// fixed at Browserbase-session creation, so switching means recreating the
|
|
341
|
+
// session (see ensureProxyMode). Only meaningful for the browserbase provider.
|
|
342
|
+
launchedProxyMode = false;
|
|
343
|
+
proxyEnabled = false;
|
|
271
344
|
constructor(sessionId, config = {}) {
|
|
272
345
|
this.sessionId = sessionId;
|
|
273
346
|
this.config = config;
|
|
@@ -420,6 +493,9 @@ var BrowserSession = class {
|
|
|
420
493
|
if (this.config.cdpUrl) {
|
|
421
494
|
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
422
495
|
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
496
|
+
} else if (this.config.provider === "browserbase" && this.proxyEnabled) {
|
|
497
|
+
launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
|
|
498
|
+
console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
|
|
423
499
|
} else if (this.config.provider) {
|
|
424
500
|
launchOpts.provider = this.config.provider;
|
|
425
501
|
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
@@ -448,6 +524,7 @@ var BrowserSession = class {
|
|
|
448
524
|
launchOpts.args = baseArgs;
|
|
449
525
|
}
|
|
450
526
|
await mgr.launch(launchOpts);
|
|
527
|
+
this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
|
|
451
528
|
if (this.isRemote) {
|
|
452
529
|
try {
|
|
453
530
|
await mgr.setViewport(viewport.width ?? 1280, viewport.height ?? 720);
|
|
@@ -587,9 +664,10 @@ var BrowserSession = class {
|
|
|
587
664
|
// -----------------------------------------------------------------------
|
|
588
665
|
// Browser operations (all scoped by conversationId)
|
|
589
666
|
// -----------------------------------------------------------------------
|
|
590
|
-
async open(conversationId, url) {
|
|
667
|
+
async open(conversationId, url, opts) {
|
|
591
668
|
await this.lock();
|
|
592
669
|
try {
|
|
670
|
+
await this.ensureProxyMode(url, opts?.proxy);
|
|
593
671
|
return await this._doOpen(conversationId, url);
|
|
594
672
|
} catch (err) {
|
|
595
673
|
const msg = err?.message ?? "";
|
|
@@ -616,6 +694,41 @@ var BrowserSession = class {
|
|
|
616
694
|
this.unlock();
|
|
617
695
|
}
|
|
618
696
|
}
|
|
697
|
+
/**
|
|
698
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
699
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
700
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
701
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
702
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
703
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
704
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
705
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
706
|
+
*/
|
|
707
|
+
async ensureProxyMode(url, requested) {
|
|
708
|
+
if (this.config.provider !== "browserbase") return;
|
|
709
|
+
const want = requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
|
|
710
|
+
this.proxyEnabled = want;
|
|
711
|
+
if (!this.manager || want === this.launchedProxyMode) return;
|
|
712
|
+
console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
|
|
713
|
+
try {
|
|
714
|
+
await this.persistStorageState();
|
|
715
|
+
} catch {
|
|
716
|
+
}
|
|
717
|
+
try {
|
|
718
|
+
await this.manager.close();
|
|
719
|
+
} catch {
|
|
720
|
+
}
|
|
721
|
+
this.manager = void 0;
|
|
722
|
+
this._contextStealthInstalled = false;
|
|
723
|
+
this._uaOverrideApplied.clear();
|
|
724
|
+
for (const [, t] of this.tabs) {
|
|
725
|
+
if (t.tabIndex >= 0) {
|
|
726
|
+
t.tabIndex = -1;
|
|
727
|
+
t.active = false;
|
|
728
|
+
t.url = void 0;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
619
732
|
async _doOpen(conversationId, url) {
|
|
620
733
|
const mgr = await this.ensureManager();
|
|
621
734
|
const tab = await this.switchToConversation(mgr, conversationId);
|
|
@@ -693,6 +806,55 @@ var BrowserSession = class {
|
|
|
693
806
|
this.unlock();
|
|
694
807
|
}
|
|
695
808
|
}
|
|
809
|
+
/**
|
|
810
|
+
* Fetch a file using the page's own (logged-in) session and return its
|
|
811
|
+
* bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
|
|
812
|
+
* current page. The fetch runs INSIDE the page via `evaluate`, so it carries
|
|
813
|
+
* the site's cookies and works the same whether the browser is local or a
|
|
814
|
+
* remote/cloud provider (the bytes come back over CDP). Because it's a page
|
|
815
|
+
* `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
|
|
816
|
+
* site doesn't allow CORS for will fail — navigate to the file first (so it's
|
|
817
|
+
* same-origin) or pass its direct URL while on that site.
|
|
818
|
+
*/
|
|
819
|
+
async download(conversationId, url) {
|
|
820
|
+
await this.lock();
|
|
821
|
+
try {
|
|
822
|
+
const mgr = await this.ensureManager();
|
|
823
|
+
await this.switchToConversation(mgr, conversationId);
|
|
824
|
+
const page = mgr.getPage();
|
|
825
|
+
const target = url && url.trim() ? url.trim() : page.url();
|
|
826
|
+
if (!target || target === "about:blank") {
|
|
827
|
+
throw new Error("no URL to download (open the file's page first, or pass a url)");
|
|
828
|
+
}
|
|
829
|
+
const MAX_BYTES = 25 * 1024 * 1024;
|
|
830
|
+
const expr = `(async () => {
|
|
831
|
+
const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
|
|
832
|
+
if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
|
|
833
|
+
const buf = new Uint8Array(await res.arrayBuffer());
|
|
834
|
+
if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
|
|
835
|
+
let bin = "";
|
|
836
|
+
const CH = 0x8000;
|
|
837
|
+
for (let i = 0; i < buf.length; i += CH) {
|
|
838
|
+
bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
|
|
839
|
+
}
|
|
840
|
+
return {
|
|
841
|
+
base64: btoa(bin),
|
|
842
|
+
contentType: res.headers.get("content-type") || "",
|
|
843
|
+
disposition: res.headers.get("content-disposition") || "",
|
|
844
|
+
finalUrl: res.url || ${JSON.stringify(target)},
|
|
845
|
+
};
|
|
846
|
+
})()`;
|
|
847
|
+
const r = await page.evaluate(expr);
|
|
848
|
+
const data = Buffer.from(r.base64, "base64");
|
|
849
|
+
return {
|
|
850
|
+
data,
|
|
851
|
+
contentType: r.contentType,
|
|
852
|
+
filename: filenameFromDownload(r.disposition, r.finalUrl)
|
|
853
|
+
};
|
|
854
|
+
} finally {
|
|
855
|
+
this.unlock();
|
|
856
|
+
}
|
|
857
|
+
}
|
|
696
858
|
async scroll(conversationId, direction, amount) {
|
|
697
859
|
await this.lock();
|
|
698
860
|
try {
|
|
@@ -1118,13 +1280,17 @@ function createBrowserTools(getSession) {
|
|
|
1118
1280
|
return [
|
|
1119
1281
|
{
|
|
1120
1282
|
name: "browser_open",
|
|
1121
|
-
description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
1283
|
+
description: "Open a URL in a real browser whose live view the user can watch and interact with. Returns the page title. This is a HEAVY, last-resort tool \u2014 prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). When a site needs credentials, navigate to its login page and let the user sign in directly in the live view \u2014 never ask for passwords in chat. If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
1122
1284
|
inputSchema: {
|
|
1123
1285
|
type: "object",
|
|
1124
1286
|
properties: {
|
|
1125
1287
|
url: {
|
|
1126
1288
|
type: "string",
|
|
1127
1289
|
description: "The URL to navigate to (must include protocol, e.g. https://)"
|
|
1290
|
+
},
|
|
1291
|
+
proxy: {
|
|
1292
|
+
type: "boolean",
|
|
1293
|
+
description: "Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, \u2026) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked."
|
|
1128
1294
|
}
|
|
1129
1295
|
},
|
|
1130
1296
|
required: ["url"]
|
|
@@ -1134,7 +1300,8 @@ function createBrowserTools(getSession) {
|
|
|
1134
1300
|
const cid = context.conversationId ?? "__default__";
|
|
1135
1301
|
const url = String(input.url ?? "");
|
|
1136
1302
|
if (!url) throw new Error("url is required");
|
|
1137
|
-
const
|
|
1303
|
+
const proxy = input.proxy === true;
|
|
1304
|
+
const result = await session.open(cid, url, { proxy });
|
|
1138
1305
|
session.startScreencast(cid).catch((err) => {
|
|
1139
1306
|
console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
|
|
1140
1307
|
});
|
|
@@ -1261,6 +1428,47 @@ function createBrowserTools(getSession) {
|
|
|
1261
1428
|
return { url: result.url, title: result.title, text: result.text };
|
|
1262
1429
|
}
|
|
1263
1430
|
},
|
|
1431
|
+
{
|
|
1432
|
+
name: "browser_download",
|
|
1433
|
+
description: "Download a file from the browser and save it into the user's virtual filesystem (VFS). Fetches the file using the browser's logged-in session, so it works for files behind a login \u2014 use it to keep a PDF, CSV, image, or other file the page offers. It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; for a file that opens in the browser, navigate to it and call this with no url. The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). Returns the saved VFS path and byte size \u2014 the bytes go straight to the VFS, not through the chat.",
|
|
1434
|
+
inputSchema: {
|
|
1435
|
+
type: "object",
|
|
1436
|
+
properties: {
|
|
1437
|
+
path: {
|
|
1438
|
+
type: "string",
|
|
1439
|
+
description: "Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. End with '/' (e.g. /downloads/) to keep the file's own name."
|
|
1440
|
+
},
|
|
1441
|
+
url: {
|
|
1442
|
+
type: "string",
|
|
1443
|
+
description: "URL of the file to download. Optional \u2014 defaults to the current page's URL."
|
|
1444
|
+
}
|
|
1445
|
+
},
|
|
1446
|
+
required: ["path"]
|
|
1447
|
+
},
|
|
1448
|
+
handler: async (input, context) => {
|
|
1449
|
+
const session = getSession();
|
|
1450
|
+
const vfs = context.vfs;
|
|
1451
|
+
if (!vfs) throw new Error("VFS is not available in this environment");
|
|
1452
|
+
const dest0 = String(input.path ?? "").trim();
|
|
1453
|
+
if (!dest0) throw new Error("path is required");
|
|
1454
|
+
const url = input.url != null ? String(input.url) : void 0;
|
|
1455
|
+
const { data, contentType, filename } = await session.download(
|
|
1456
|
+
context.conversationId ?? "__default__",
|
|
1457
|
+
url
|
|
1458
|
+
);
|
|
1459
|
+
let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
|
|
1460
|
+
if (dest.endsWith("/")) dest = `${dest}${filename}`;
|
|
1461
|
+
const slash = dest.lastIndexOf("/");
|
|
1462
|
+
if (slash > 0) {
|
|
1463
|
+
try {
|
|
1464
|
+
await vfs.mkdir(dest.slice(0, slash), { recursive: true });
|
|
1465
|
+
} catch {
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
await vfs.writeFile(dest, new Uint8Array(data), contentType || void 0);
|
|
1469
|
+
return { path: dest, bytes: data.length, ...contentType ? { contentType } : {} };
|
|
1470
|
+
}
|
|
1471
|
+
},
|
|
1264
1472
|
{
|
|
1265
1473
|
name: "browser_screenshot",
|
|
1266
1474
|
description: "Take a screenshot of the current page. Returns the image so you can see exactly what the page looks like. Use this when you need to see visual layout, verify actions, or read content that isn't in the accessibility tree.",
|
package/package.json
CHANGED
package/src/session.ts
CHANGED
|
@@ -62,6 +62,64 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
|
|
|
62
62
|
|
|
63
63
|
const MAX_TABS = 8;
|
|
64
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Sites that hard-block datacenter IPs (a 403 / "blocked by network security"
|
|
67
|
+
* before any fingerprint check). Navigating to one auto-enables residential
|
|
68
|
+
* proxies. Matched on the registrable-ish suffix so subdomains
|
|
69
|
+
* (old.reddit.com, www.linkedin.com) are covered. The agent can also force
|
|
70
|
+
* proxies on for anything else via `browser_open`'s `proxy` param.
|
|
71
|
+
*/
|
|
72
|
+
const PROXY_DOMAINS = [
|
|
73
|
+
"reddit.com",
|
|
74
|
+
"linkedin.com",
|
|
75
|
+
"instagram.com",
|
|
76
|
+
"facebook.com",
|
|
77
|
+
"x.com",
|
|
78
|
+
"twitter.com",
|
|
79
|
+
"tiktok.com",
|
|
80
|
+
"quora.com",
|
|
81
|
+
"pinterest.com",
|
|
82
|
+
];
|
|
83
|
+
|
|
84
|
+
/** Whether a URL's host is (a subdomain of) a known IP-blocking domain. */
|
|
85
|
+
function shouldProxyFor(url: string): boolean {
|
|
86
|
+
let host: string;
|
|
87
|
+
try { host = new URL(url).hostname.toLowerCase(); }
|
|
88
|
+
catch { return false; }
|
|
89
|
+
return PROXY_DOMAINS.some((d) => host === d || host.endsWith(`.${d}`));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Create a Browserbase session with residential proxies enabled and return its
|
|
94
|
+
* CDP `connectUrl`. Used instead of agent-browser's built-in browserbase path,
|
|
95
|
+
* which hardcodes the create body to `{ projectId }` and so can't turn proxies
|
|
96
|
+
* on. Reads the same `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` env vars
|
|
97
|
+
* agent-browser does.
|
|
98
|
+
*/
|
|
99
|
+
async function createBrowserbaseProxiedSession(): Promise<string> {
|
|
100
|
+
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
101
|
+
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
102
|
+
if (!apiKey || !projectId) {
|
|
103
|
+
throw new Error(
|
|
104
|
+
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID are required when using browserbase",
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
const res = await fetch("https://api.browserbase.com/v1/sessions", {
|
|
108
|
+
method: "POST",
|
|
109
|
+
headers: { "Content-Type": "application/json", "X-BB-API-Key": apiKey },
|
|
110
|
+
body: JSON.stringify({ projectId, proxies: true }),
|
|
111
|
+
});
|
|
112
|
+
if (!res.ok) {
|
|
113
|
+
const detail = await res.text().catch(() => "");
|
|
114
|
+
throw new Error(`Failed to create Browserbase session: ${res.status} ${detail}`);
|
|
115
|
+
}
|
|
116
|
+
const session = (await res.json()) as { connectUrl?: string };
|
|
117
|
+
if (!session.connectUrl) {
|
|
118
|
+
throw new Error("Browserbase session response missing connectUrl");
|
|
119
|
+
}
|
|
120
|
+
return session.connectUrl;
|
|
121
|
+
}
|
|
122
|
+
|
|
65
123
|
const VALID_SAME_SITE = ["Strict", "Lax", "None"];
|
|
66
124
|
|
|
67
125
|
/**
|
|
@@ -146,6 +204,29 @@ interface ConversationTab {
|
|
|
146
204
|
lastUsed: number;
|
|
147
205
|
}
|
|
148
206
|
|
|
207
|
+
/** Strip path separators / nulls so a derived name can't escape its folder. */
|
|
208
|
+
function sanitizeName(name: string): string {
|
|
209
|
+
const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
|
|
210
|
+
return cleaned || "download";
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/** Derive a filename from a Content-Disposition header, falling back to the
|
|
214
|
+
* URL's last path segment, then a generic "download". */
|
|
215
|
+
function filenameFromDownload(disposition: string, url: string): string {
|
|
216
|
+
const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
|
|
217
|
+
if (star?.[1]) {
|
|
218
|
+
try { return sanitizeName(decodeURIComponent(star[1])); }
|
|
219
|
+
catch { return sanitizeName(star[1]); }
|
|
220
|
+
}
|
|
221
|
+
const plain = /filename=["']?([^"';]+)/i.exec(disposition);
|
|
222
|
+
if (plain?.[1]) return sanitizeName(plain[1]);
|
|
223
|
+
try {
|
|
224
|
+
const base = new URL(url).pathname.split("/").filter(Boolean).pop();
|
|
225
|
+
if (base) return sanitizeName(decodeURIComponent(base));
|
|
226
|
+
} catch { /* not a parseable URL */ }
|
|
227
|
+
return "download";
|
|
228
|
+
}
|
|
229
|
+
|
|
149
230
|
export class BrowserSession {
|
|
150
231
|
private readonly config: BrowserConfig;
|
|
151
232
|
private readonly sessionId: string;
|
|
@@ -177,6 +258,15 @@ export class BrowserSession {
|
|
|
177
258
|
// Currently screencast conversation (only one at a time due to CDP)
|
|
178
259
|
private _screencastConversation: string | undefined;
|
|
179
260
|
|
|
261
|
+
// Residential-proxy mode. `launchedProxyMode` is what the currently-launched
|
|
262
|
+
// remote session was created with; `proxyEnabled` is the mode the NEXT launch
|
|
263
|
+
// should use. They diverge when a navigation asks for a different mode (a
|
|
264
|
+
// hard-domain gate hit, or an explicit `proxy` on browser_open) — proxies are
|
|
265
|
+
// fixed at Browserbase-session creation, so switching means recreating the
|
|
266
|
+
// session (see ensureProxyMode). Only meaningful for the browserbase provider.
|
|
267
|
+
private launchedProxyMode = false;
|
|
268
|
+
private proxyEnabled = false;
|
|
269
|
+
|
|
180
270
|
constructor(sessionId: string, config: BrowserConfig = {}) {
|
|
181
271
|
this.sessionId = sessionId;
|
|
182
272
|
this.config = config;
|
|
@@ -356,6 +446,16 @@ export class BrowserSession {
|
|
|
356
446
|
if (this.config.cdpUrl) {
|
|
357
447
|
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
358
448
|
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
449
|
+
} else if (this.config.provider === "browserbase" && this.proxyEnabled) {
|
|
450
|
+
// agent-browser's browserbase path creates the session with only
|
|
451
|
+
// { projectId } — no proxy option — so it always lands on a datacenter IP
|
|
452
|
+
// that sites like Reddit block with a 403 before any fingerprint check.
|
|
453
|
+
// Create the session ourselves with residential proxies enabled and hand
|
|
454
|
+
// agent-browser the connectUrl via its cdpUrl path, which bypasses
|
|
455
|
+
// connectToBrowserbase. Stealth, cookie restore, and the screencast are
|
|
456
|
+
// applied below on the connected context, unchanged.
|
|
457
|
+
launchOpts.cdpUrl = await createBrowserbaseProxiedSession();
|
|
458
|
+
console.log("[poncho][browser] Using cloud provider: browserbase (residential proxies)");
|
|
359
459
|
} else if (this.config.provider) {
|
|
360
460
|
launchOpts.provider = this.config.provider;
|
|
361
461
|
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
@@ -391,6 +491,9 @@ export class BrowserSession {
|
|
|
391
491
|
}
|
|
392
492
|
|
|
393
493
|
await mgr.launch(launchOpts as Parameters<BrowserManagerInstance["launch"]>[0]);
|
|
494
|
+
// Record the proxy mode this session was actually launched with, so
|
|
495
|
+
// ensureProxyMode knows whether a later navigation needs a recreate.
|
|
496
|
+
this.launchedProxyMode = this.config.provider === "browserbase" && this.proxyEnabled;
|
|
394
497
|
|
|
395
498
|
// Remote browsers (cloud provider / cdpUrl) ignore launchOpts.viewport —
|
|
396
499
|
// that's only applied when launching a local context — so the page renders
|
|
@@ -542,9 +645,14 @@ export class BrowserSession {
|
|
|
542
645
|
// Browser operations (all scoped by conversationId)
|
|
543
646
|
// -----------------------------------------------------------------------
|
|
544
647
|
|
|
545
|
-
async open(
|
|
648
|
+
async open(
|
|
649
|
+
conversationId: string,
|
|
650
|
+
url: string,
|
|
651
|
+
opts?: { proxy?: boolean },
|
|
652
|
+
): Promise<{ title?: string }> {
|
|
546
653
|
await this.lock();
|
|
547
654
|
try {
|
|
655
|
+
await this.ensureProxyMode(url, opts?.proxy);
|
|
548
656
|
return await this._doOpen(conversationId, url);
|
|
549
657
|
} catch (err: unknown) {
|
|
550
658
|
const msg = (err as Error)?.message ?? "";
|
|
@@ -565,6 +673,33 @@ export class BrowserSession {
|
|
|
565
673
|
}
|
|
566
674
|
}
|
|
567
675
|
|
|
676
|
+
/**
|
|
677
|
+
* Reconcile the residential-proxy mode before a navigation. The desired mode
|
|
678
|
+
* is: an explicit `requested` (the agent's `browser_open` proxy param) OR the
|
|
679
|
+
* URL hitting a known IP-blocking domain OR the config default. Since proxies
|
|
680
|
+
* are fixed at Browserbase-session creation, a change tears the live session
|
|
681
|
+
* down so the next ensureManager relaunches proxied. Cookies/localStorage are
|
|
682
|
+
* persisted first and restored on relaunch, so login state survives; open
|
|
683
|
+
* tabs in other conversations of the same session are lost (rare, and only
|
|
684
|
+
* when the mode actually flips). No-op unless the provider is browserbase.
|
|
685
|
+
*/
|
|
686
|
+
private async ensureProxyMode(url: string, requested?: boolean): Promise<void> {
|
|
687
|
+
if (this.config.provider !== "browserbase") return;
|
|
688
|
+
const want =
|
|
689
|
+
requested === true || shouldProxyFor(url) || (this.config.proxies ?? false);
|
|
690
|
+
this.proxyEnabled = want;
|
|
691
|
+
if (!this.manager || want === this.launchedProxyMode) return;
|
|
692
|
+
console.log(`[poncho][browser] Switching proxy mode -> ${want}; recreating session`);
|
|
693
|
+
try { await this.persistStorageState(); } catch { /* best-effort */ }
|
|
694
|
+
try { await this.manager.close(); } catch { /* */ }
|
|
695
|
+
this.manager = undefined;
|
|
696
|
+
this._contextStealthInstalled = false;
|
|
697
|
+
this._uaOverrideApplied.clear();
|
|
698
|
+
for (const [, t] of this.tabs) {
|
|
699
|
+
if (t.tabIndex >= 0) { t.tabIndex = -1; t.active = false; t.url = undefined; }
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
568
703
|
private async _doOpen(conversationId: string, url: string): Promise<{ title?: string }> {
|
|
569
704
|
const mgr = await this.ensureManager();
|
|
570
705
|
const tab = await this.switchToConversation(mgr, conversationId);
|
|
@@ -655,6 +790,66 @@ export class BrowserSession {
|
|
|
655
790
|
}
|
|
656
791
|
}
|
|
657
792
|
|
|
793
|
+
/**
|
|
794
|
+
* Fetch a file using the page's own (logged-in) session and return its
|
|
795
|
+
* bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
|
|
796
|
+
* current page. The fetch runs INSIDE the page via `evaluate`, so it carries
|
|
797
|
+
* the site's cookies and works the same whether the browser is local or a
|
|
798
|
+
* remote/cloud provider (the bytes come back over CDP). Because it's a page
|
|
799
|
+
* `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
|
|
800
|
+
* site doesn't allow CORS for will fail — navigate to the file first (so it's
|
|
801
|
+
* same-origin) or pass its direct URL while on that site.
|
|
802
|
+
*/
|
|
803
|
+
async download(
|
|
804
|
+
conversationId: string,
|
|
805
|
+
url?: string,
|
|
806
|
+
): Promise<{ data: Buffer; contentType: string; filename: string }> {
|
|
807
|
+
await this.lock();
|
|
808
|
+
try {
|
|
809
|
+
const mgr = await this.ensureManager();
|
|
810
|
+
await this.switchToConversation(mgr, conversationId);
|
|
811
|
+
const page = mgr.getPage();
|
|
812
|
+
const target = url && url.trim() ? url.trim() : page.url();
|
|
813
|
+
if (!target || target === "about:blank") {
|
|
814
|
+
throw new Error("no URL to download (open the file's page first, or pass a url)");
|
|
815
|
+
}
|
|
816
|
+
const MAX_BYTES = 25 * 1024 * 1024;
|
|
817
|
+
// Build the in-page fetch. JSON.stringify safely escapes the URL into the
|
|
818
|
+
// evaluated source. Base64 in-page so the bytes survive the JSON channel.
|
|
819
|
+
const expr = `(async () => {
|
|
820
|
+
const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
|
|
821
|
+
if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
|
|
822
|
+
const buf = new Uint8Array(await res.arrayBuffer());
|
|
823
|
+
if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
|
|
824
|
+
let bin = "";
|
|
825
|
+
const CH = 0x8000;
|
|
826
|
+
for (let i = 0; i < buf.length; i += CH) {
|
|
827
|
+
bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
|
|
828
|
+
}
|
|
829
|
+
return {
|
|
830
|
+
base64: btoa(bin),
|
|
831
|
+
contentType: res.headers.get("content-type") || "",
|
|
832
|
+
disposition: res.headers.get("content-disposition") || "",
|
|
833
|
+
finalUrl: res.url || ${JSON.stringify(target)},
|
|
834
|
+
};
|
|
835
|
+
})()`;
|
|
836
|
+
const r = (await page.evaluate(expr)) as {
|
|
837
|
+
base64: string;
|
|
838
|
+
contentType: string;
|
|
839
|
+
disposition: string;
|
|
840
|
+
finalUrl: string;
|
|
841
|
+
};
|
|
842
|
+
const data = Buffer.from(r.base64, "base64");
|
|
843
|
+
return {
|
|
844
|
+
data,
|
|
845
|
+
contentType: r.contentType,
|
|
846
|
+
filename: filenameFromDownload(r.disposition, r.finalUrl),
|
|
847
|
+
};
|
|
848
|
+
} finally {
|
|
849
|
+
this.unlock();
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
658
853
|
async scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void> {
|
|
659
854
|
await this.lock();
|
|
660
855
|
try {
|
package/src/tools.ts
CHANGED
|
@@ -14,6 +14,7 @@ export function createBrowserTools(
|
|
|
14
14
|
"This is a HEAVY, last-resort tool — prefer cheaper options first: use `web_fetch` to read page content, and use a dedicated API or MCP integration when one exists for the service. " +
|
|
15
15
|
"Only reach for the browser when those can't do the job: a page `web_fetch` can't render (JS-heavy/SPA), or a task that requires operating a site or web app that has no API and no MCP integration (e.g. logging in and clicking through a UI). " +
|
|
16
16
|
"When a site needs credentials, navigate to its login page and let the user sign in directly in the live view — never ask for passwords in chat. " +
|
|
17
|
+
"If a page returns a 403 / 'blocked by network security' / 'access denied' (common on Reddit, LinkedIn, Instagram, and similar), retry with `proxy: true` to route through a residential IP. " +
|
|
17
18
|
"To open files from the virtual filesystem, use /api/vfs/{path} (e.g. /api/vfs/downloads/report.pdf).",
|
|
18
19
|
inputSchema: {
|
|
19
20
|
type: "object",
|
|
@@ -22,6 +23,11 @@ export function createBrowserTools(
|
|
|
22
23
|
type: "string",
|
|
23
24
|
description: "The URL to navigate to (must include protocol, e.g. https://)",
|
|
24
25
|
},
|
|
26
|
+
proxy: {
|
|
27
|
+
type: "boolean",
|
|
28
|
+
description:
|
|
29
|
+
"Route this session through a residential proxy instead of a datacenter IP. Known IP-blocking sites (Reddit, LinkedIn, …) use this automatically; set it explicitly to retry a site that blocked you with a 403/'blocked' page. Slower to start (the session is recreated) and costs proxy bandwidth, so only use it when a normal open was blocked.",
|
|
30
|
+
},
|
|
25
31
|
},
|
|
26
32
|
required: ["url"],
|
|
27
33
|
},
|
|
@@ -30,7 +36,8 @@ export function createBrowserTools(
|
|
|
30
36
|
const cid = context.conversationId ?? "__default__";
|
|
31
37
|
const url = String(input.url ?? "");
|
|
32
38
|
if (!url) throw new Error("url is required");
|
|
33
|
-
const
|
|
39
|
+
const proxy = input.proxy === true;
|
|
40
|
+
const result = await session.open(cid, url, { proxy });
|
|
34
41
|
session.startScreencast(cid).catch((err) => {
|
|
35
42
|
console.error("[poncho][browser] startScreencast failed:", err?.message ?? err);
|
|
36
43
|
});
|
|
@@ -176,6 +183,55 @@ export function createBrowserTools(
|
|
|
176
183
|
return { url: result.url, title: result.title, text: result.text };
|
|
177
184
|
},
|
|
178
185
|
},
|
|
186
|
+
{
|
|
187
|
+
name: "browser_download",
|
|
188
|
+
description:
|
|
189
|
+
"Download a file from the browser and save it into the user's virtual filesystem (VFS). " +
|
|
190
|
+
"Fetches the file using the browser's logged-in session, so it works for files behind a login — " +
|
|
191
|
+
"use it to keep a PDF, CSV, image, or other file the page offers. " +
|
|
192
|
+
"It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; " +
|
|
193
|
+
"for a file that opens in the browser, navigate to it and call this with no url. " +
|
|
194
|
+
"The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). " +
|
|
195
|
+
"Returns the saved VFS path and byte size — the bytes go straight to the VFS, not through the chat.",
|
|
196
|
+
inputSchema: {
|
|
197
|
+
type: "object",
|
|
198
|
+
properties: {
|
|
199
|
+
path: {
|
|
200
|
+
type: "string",
|
|
201
|
+
description:
|
|
202
|
+
"Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. " +
|
|
203
|
+
"End with '/' (e.g. /downloads/) to keep the file's own name.",
|
|
204
|
+
},
|
|
205
|
+
url: {
|
|
206
|
+
type: "string",
|
|
207
|
+
description:
|
|
208
|
+
"URL of the file to download. Optional — defaults to the current page's URL.",
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
required: ["path"],
|
|
212
|
+
},
|
|
213
|
+
handler: async (input: BrowserToolInput, context: ToolContext) => {
|
|
214
|
+
const session = getSession();
|
|
215
|
+
const vfs = context.vfs;
|
|
216
|
+
if (!vfs) throw new Error("VFS is not available in this environment");
|
|
217
|
+
const dest0 = String(input.path ?? "").trim();
|
|
218
|
+
if (!dest0) throw new Error("path is required");
|
|
219
|
+
const url = input.url != null ? String(input.url) : undefined;
|
|
220
|
+
const { data, contentType, filename } = await session.download(
|
|
221
|
+
context.conversationId ?? "__default__",
|
|
222
|
+
url,
|
|
223
|
+
);
|
|
224
|
+
// A trailing slash (or bare folder) means "use the file's own name".
|
|
225
|
+
let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
|
|
226
|
+
if (dest.endsWith("/")) dest = `${dest}${filename}`;
|
|
227
|
+
const slash = dest.lastIndexOf("/");
|
|
228
|
+
if (slash > 0) {
|
|
229
|
+
try { await vfs.mkdir(dest.slice(0, slash), { recursive: true }); } catch { /* exists */ }
|
|
230
|
+
}
|
|
231
|
+
await vfs.writeFile(dest, new Uint8Array(data), contentType || undefined);
|
|
232
|
+
return { path: dest, bytes: data.length, ...(contentType ? { contentType } : {}) };
|
|
233
|
+
},
|
|
234
|
+
},
|
|
179
235
|
{
|
|
180
236
|
name: "browser_screenshot",
|
|
181
237
|
description:
|
package/src/types.ts
CHANGED
|
@@ -77,4 +77,10 @@ export interface BrowserConfig {
|
|
|
77
77
|
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
78
78
|
* Mutually exclusive with `provider`. */
|
|
79
79
|
cdpUrl?: string;
|
|
80
|
+
/** Default residential-proxy mode for Browserbase sessions. Datacenter IPs are
|
|
81
|
+
* hard-blocked (403) by IP-reputation walls (Reddit, LinkedIn, …) before any
|
|
82
|
+
* fingerprint check. Known such domains are always proxied automatically, and
|
|
83
|
+
* `browser_open` can force it per-navigation; set this `true` to proxy every
|
|
84
|
+
* session by default. Billed per GB by Browserbase. Browserbase provider only. */
|
|
85
|
+
proxies?: boolean;
|
|
80
86
|
}
|