@poncho-ai/browser 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/.turbo/turbo-test.log +12 -0
- package/CHANGELOG.md +16 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +169 -4
- package/package.json +1 -1
- package/src/session.ts +142 -4
- package/src/tools.ts +56 -0
- package/src/types.ts +7 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/browser@0.
|
|
2
|
+
> @poncho-ai/browser@0.6.0 build /Users/cesar/Dev/latitude/poncho-ai/packages/browser
|
|
3
3
|
> tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
[34mCLI[39m tsup v8.5.1
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
11
|
-
[32mESM[39m ⚡️ Build success in
|
|
10
|
+
[32mESM[39m [1mdist/index.js [22m[32m40.85 KB[39m
|
|
11
|
+
[32mESM[39m ⚡️ Build success in 76ms
|
|
12
12
|
[34mDTS[39m Build start
|
|
13
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
-
[32mDTS[39m [1mdist/index.d.ts [22m[
|
|
13
|
+
[32mDTS[39m ⚡️ Build success in 1346ms
|
|
14
|
+
[32mDTS[39m [1mdist/index.d.ts [22m[32m13.55 KB[39m
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+
> @poncho-ai/browser@0.3.0 test /Users/cesar/Dev/latitude/poncho-ai/packages/browser
|
|
3
|
+
> vitest --passWithNoTests
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
[7m[1m[36m RUN [39m[22m[27m [36mv1.6.1[39m [90m/Users/cesar/Dev/latitude/poncho-ai/packages/browser[39m
|
|
7
|
+
|
|
8
|
+
[2minclude: [22m[33m**/*.{test,spec}.?(c|m)[jt]s?(x)[39m
|
|
9
|
+
[2mexclude: [22m[33m**/node_modules/**[2m, [22m**/dist/**[2m, [22m**/cypress/**[2m, [22m**/.{idea,git,cache,output,temp}/**[2m, [22m**/{karma,rollup,webpack,vite,vitest,jest,ava,babel,nyc,cypress,tsup,build,eslint,prettier}.config.*[39m
|
|
10
|
+
[2mwatch exclude: [22m[33m**/node_modules/**[2m, [22m**/dist/**[39m
|
|
11
|
+
No test files found, exiting with code 0
|
|
12
|
+
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @poncho-ai/browser
|
|
2
2
|
|
|
3
|
+
## 0.6.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [`76294e9`](https://github.com/cesr/poncho-ai/commit/76294e95035bf3abbb19c28871a33f82351c49ec) Thanks [@cesr](https://github.com/cesr)! - Support remote and serverless browser deployments.
|
|
8
|
+
|
|
9
|
+
**@poncho-ai/browser**: Add `provider` and `cdpUrl` config options for cloud browser services (Browserbase, Browser Use, Kernel) and direct CDP connections. Auto-detect `@sparticuz/chromium` on serverless platforms (Vercel, Lambda) and default the profile directory to `/tmp`.
|
|
10
|
+
|
|
11
|
+
**@poncho-ai/cli**: Generate @vercel/nft trace hints for `@poncho-ai/browser` and `@sparticuz/chromium` in the Vercel entry point so dynamically-loaded browser packages are bundled into the serverless function.
|
|
12
|
+
|
|
13
|
+
## 0.5.0
|
|
14
|
+
|
|
15
|
+
### Minor Changes
|
|
16
|
+
|
|
17
|
+
- [`540c8e6`](https://github.com/cesr/poncho-ai/commit/540c8e6d895a95c2f215deb4af219069543371d9) Thanks [@cesr](https://github.com/cesr)! - Add `browser_click_text` and `browser_execute_js` tools for interacting with elements that don't appear in the accessibility snapshot (e.g. styled divs acting as buttons). Also force new-tab navigations (`window.open`, `target="_blank"`) to stay in the current tab so agents don't lose context.
|
|
18
|
+
|
|
3
19
|
## 0.4.0
|
|
4
20
|
|
|
5
21
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -64,6 +64,13 @@ interface BrowserConfig {
|
|
|
64
64
|
* user-agent, and passes anti-automation Chrome flags. */
|
|
65
65
|
stealth?: boolean;
|
|
66
66
|
storagePersistence?: BrowserStoragePersistence;
|
|
67
|
+
/** Cloud browser provider. Requires the provider's API key env var to be set
|
|
68
|
+
* (e.g. `BROWSERBASE_API_KEY` + `BROWSERBASE_PROJECT_ID` for Browserbase).
|
|
69
|
+
* When set, the browser runs remotely instead of launching a local Chromium. */
|
|
70
|
+
provider?: "browserbase" | "browseruse" | "kernel";
|
|
71
|
+
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
72
|
+
* Mutually exclusive with `provider`. */
|
|
73
|
+
cdpUrl?: string;
|
|
67
74
|
}
|
|
68
75
|
|
|
69
76
|
type FrameListener = (frame: BrowserFrame) => void;
|
|
@@ -90,11 +97,24 @@ declare class BrowserSession {
|
|
|
90
97
|
* Only needs to be called once per browser launch.
|
|
91
98
|
*/
|
|
92
99
|
private installContextStealth;
|
|
100
|
+
/**
|
|
101
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
102
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
103
|
+
* and can't see or interact with popups.
|
|
104
|
+
*/
|
|
105
|
+
private installSameTabScript;
|
|
93
106
|
/**
|
|
94
107
|
* Override the user-agent via CDP on the current page target.
|
|
95
108
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
96
109
|
*/
|
|
97
110
|
private overrideUserAgentOnPage;
|
|
111
|
+
private get isRemote();
|
|
112
|
+
private get isServerless();
|
|
113
|
+
/**
|
|
114
|
+
* Resolve executablePath for local launches. When no explicit path is set
|
|
115
|
+
* and we're on a serverless platform, try `@sparticuz/chromium` automatically.
|
|
116
|
+
*/
|
|
117
|
+
private resolveExecutablePath;
|
|
98
118
|
private launchFreshManager;
|
|
99
119
|
private ensureManager;
|
|
100
120
|
private evictOldestTab;
|
|
@@ -122,6 +142,8 @@ declare class BrowserSession {
|
|
|
122
142
|
title: string;
|
|
123
143
|
}>;
|
|
124
144
|
scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
|
|
145
|
+
clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
|
|
146
|
+
executeJs(conversationId: string, script: string): Promise<unknown>;
|
|
125
147
|
closeTab(conversationId: string): Promise<void>;
|
|
126
148
|
navigate(conversationId: string, action: string): Promise<void>;
|
|
127
149
|
startScreencast(conversationId: string, options?: ScreencastOptions): Promise<void>;
|
package/dist/index.js
CHANGED
|
@@ -186,6 +186,46 @@ async function getBrowserManagerCtor() {
|
|
|
186
186
|
return BrowserManagerCtor;
|
|
187
187
|
}
|
|
188
188
|
var MAX_TABS = 8;
|
|
189
|
+
var SAME_TAB_INIT_SCRIPT = `
|
|
190
|
+
(() => {
|
|
191
|
+
// Override window.open to navigate in-place
|
|
192
|
+
try {
|
|
193
|
+
const origOpen = window.open;
|
|
194
|
+
window.open = function(url, target, features) {
|
|
195
|
+
if (url) {
|
|
196
|
+
location.href = url;
|
|
197
|
+
return window;
|
|
198
|
+
}
|
|
199
|
+
return origOpen.call(this, url, target, features);
|
|
200
|
+
};
|
|
201
|
+
} catch {}
|
|
202
|
+
|
|
203
|
+
// Rewrite target="_blank" on existing and future links
|
|
204
|
+
try {
|
|
205
|
+
const rewrite = (el) => {
|
|
206
|
+
if (el.tagName === 'A' && el.target === '_blank') {
|
|
207
|
+
el.target = '_self';
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
// Catch links already in the DOM
|
|
211
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
212
|
+
document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
213
|
+
});
|
|
214
|
+
// Catch dynamically added links
|
|
215
|
+
new MutationObserver((mutations) => {
|
|
216
|
+
for (const m of mutations) {
|
|
217
|
+
for (const node of m.addedNodes) {
|
|
218
|
+
if (node.nodeType !== 1) continue;
|
|
219
|
+
rewrite(node);
|
|
220
|
+
if (node.querySelectorAll) {
|
|
221
|
+
node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}).observe(document.documentElement, { childList: true, subtree: true });
|
|
226
|
+
} catch {}
|
|
227
|
+
})();
|
|
228
|
+
`;
|
|
189
229
|
var BrowserSession = class {
|
|
190
230
|
config;
|
|
191
231
|
sessionId;
|
|
@@ -264,6 +304,20 @@ var BrowserSession = class {
|
|
|
264
304
|
console.warn("[poncho][browser] Failed to install stealth init script:", err?.message ?? err);
|
|
265
305
|
}
|
|
266
306
|
}
|
|
307
|
+
/**
|
|
308
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
309
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
310
|
+
* and can't see or interact with popups.
|
|
311
|
+
*/
|
|
312
|
+
async installSameTabScript(mgr) {
|
|
313
|
+
const ctx = mgr.getContext();
|
|
314
|
+
if (!ctx) return;
|
|
315
|
+
try {
|
|
316
|
+
await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
|
|
317
|
+
} catch (err) {
|
|
318
|
+
console.warn("[poncho][browser] Failed to install same-tab init script:", err?.message ?? err);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
267
321
|
/**
|
|
268
322
|
* Override the user-agent via CDP on the current page target.
|
|
269
323
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
@@ -284,22 +338,61 @@ var BrowserSession = class {
|
|
|
284
338
|
console.warn("[poncho][browser] Failed to override UA via CDP:", err?.message ?? err);
|
|
285
339
|
}
|
|
286
340
|
}
|
|
341
|
+
get isRemote() {
|
|
342
|
+
return !!(this.config.provider || this.config.cdpUrl);
|
|
343
|
+
}
|
|
344
|
+
get isServerless() {
|
|
345
|
+
return !!(process.env.VERCEL || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.AWS_EXECUTION_ENV || process.env.SERVERLESS);
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Resolve executablePath for local launches. When no explicit path is set
|
|
349
|
+
* and we're on a serverless platform, try `@sparticuz/chromium` automatically.
|
|
350
|
+
*/
|
|
351
|
+
async resolveExecutablePath() {
|
|
352
|
+
if (this.config.executablePath) return this.config.executablePath;
|
|
353
|
+
if (!this.isServerless) return void 0;
|
|
354
|
+
try {
|
|
355
|
+
const spec = ["@sparticuz", "chromium"].join("/");
|
|
356
|
+
const mod = await import(
|
|
357
|
+
/* webpackIgnore: true */
|
|
358
|
+
spec
|
|
359
|
+
);
|
|
360
|
+
const chromium = mod.default ?? mod;
|
|
361
|
+
const path = await chromium.executablePath();
|
|
362
|
+
console.log(`[poncho][browser] Auto-detected @sparticuz/chromium: ${path}`);
|
|
363
|
+
return path;
|
|
364
|
+
} catch {
|
|
365
|
+
return void 0;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
287
368
|
async launchFreshManager() {
|
|
288
369
|
const Ctor = await getBrowserManagerCtor();
|
|
289
370
|
const mgr = new Ctor();
|
|
290
371
|
const viewport = this.config.viewport ?? { width: 1280, height: 720 };
|
|
291
|
-
await
|
|
372
|
+
const executablePath = await this.resolveExecutablePath();
|
|
292
373
|
const launchOpts = {
|
|
293
374
|
action: "launch",
|
|
294
375
|
headless: this.config.headless ?? true,
|
|
295
376
|
viewport: { width: viewport.width ?? 1280, height: viewport.height ?? 720 },
|
|
296
|
-
executablePath
|
|
297
|
-
profile: this.profileDir
|
|
377
|
+
executablePath
|
|
298
378
|
};
|
|
379
|
+
if (this.config.cdpUrl) {
|
|
380
|
+
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
381
|
+
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
382
|
+
} else if (this.config.provider) {
|
|
383
|
+
launchOpts.provider = this.config.provider;
|
|
384
|
+
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
385
|
+
} else {
|
|
386
|
+
const profileDir = this.isServerless && !this.config.profileDir ? join(tmpdir(), "poncho-browser", this.sessionId) : this.profileDir;
|
|
387
|
+
await mkdir(profileDir, { recursive: true });
|
|
388
|
+
launchOpts.profile = profileDir;
|
|
389
|
+
}
|
|
299
390
|
if (this.stealthEnabled) {
|
|
300
391
|
const ua = this.stealthUserAgent;
|
|
301
392
|
launchOpts.userAgent = ua;
|
|
302
|
-
|
|
393
|
+
if (!this.isRemote) {
|
|
394
|
+
launchOpts.args = buildStealthArgs(ua);
|
|
395
|
+
}
|
|
303
396
|
console.log("[poncho][browser] Launching with stealth mode enabled (UA: " + ua + ")");
|
|
304
397
|
} else if (this.config.userAgent) {
|
|
305
398
|
launchOpts.userAgent = this.config.userAgent;
|
|
@@ -310,6 +403,7 @@ var BrowserSession = class {
|
|
|
310
403
|
if (this.stealthEnabled) {
|
|
311
404
|
await this.installContextStealth(mgr);
|
|
312
405
|
}
|
|
406
|
+
await this.installSameTabScript(mgr);
|
|
313
407
|
try {
|
|
314
408
|
const cdp = await mgr.getCDPSession();
|
|
315
409
|
await cdp.send("Debugger.disable");
|
|
@@ -558,6 +652,30 @@ var BrowserSession = class {
|
|
|
558
652
|
this.unlock();
|
|
559
653
|
}
|
|
560
654
|
}
|
|
655
|
+
async clickText(conversationId, text, exact) {
|
|
656
|
+
await this.lock();
|
|
657
|
+
try {
|
|
658
|
+
const mgr = await this.ensureManager();
|
|
659
|
+
const tab = await this.switchToConversation(mgr, conversationId);
|
|
660
|
+
const selector = exact ? `text="${text}"` : `text=${text}`;
|
|
661
|
+
const locator = mgr.getLocator(selector);
|
|
662
|
+
await locator.click();
|
|
663
|
+
tab.url = mgr.getPage().url();
|
|
664
|
+
} finally {
|
|
665
|
+
this.unlock();
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
async executeJs(conversationId, script) {
|
|
669
|
+
await this.lock();
|
|
670
|
+
try {
|
|
671
|
+
const mgr = await this.ensureManager();
|
|
672
|
+
await this.switchToConversation(mgr, conversationId);
|
|
673
|
+
const page = mgr.getPage();
|
|
674
|
+
return await page.evaluate(script);
|
|
675
|
+
} finally {
|
|
676
|
+
this.unlock();
|
|
677
|
+
}
|
|
678
|
+
}
|
|
561
679
|
async closeTab(conversationId) {
|
|
562
680
|
await this.lock();
|
|
563
681
|
try {
|
|
@@ -907,6 +1025,53 @@ function createBrowserTools(getSession, getConversationId) {
|
|
|
907
1025
|
return { clicked: ref };
|
|
908
1026
|
}
|
|
909
1027
|
},
|
|
1028
|
+
{
|
|
1029
|
+
name: "browser_click_text",
|
|
1030
|
+
description: "Click the first visible element on the page that contains the given text. Use this when an element doesn't appear in the snapshot \u2014 e.g. styled divs acting as buttons. By default matches substring (case-insensitive); set exact=true for exact text match.",
|
|
1031
|
+
inputSchema: {
|
|
1032
|
+
type: "object",
|
|
1033
|
+
properties: {
|
|
1034
|
+
text: {
|
|
1035
|
+
type: "string",
|
|
1036
|
+
description: "The visible text of the element to click"
|
|
1037
|
+
},
|
|
1038
|
+
exact: {
|
|
1039
|
+
type: "boolean",
|
|
1040
|
+
description: "If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive)."
|
|
1041
|
+
}
|
|
1042
|
+
},
|
|
1043
|
+
required: ["text"]
|
|
1044
|
+
},
|
|
1045
|
+
handler: async (input) => {
|
|
1046
|
+
const session = getSession();
|
|
1047
|
+
const text = String(input.text ?? "");
|
|
1048
|
+
if (!text) throw new Error("text is required");
|
|
1049
|
+
const exact = input.exact === true;
|
|
1050
|
+
await session.clickText(getConversationId(), text, exact);
|
|
1051
|
+
return { clicked: text, exact };
|
|
1052
|
+
}
|
|
1053
|
+
},
|
|
1054
|
+
{
|
|
1055
|
+
name: "browser_execute_js",
|
|
1056
|
+
description: "Execute JavaScript in the current page context and return the result. Use this to inspect or interact with the DOM when snapshot refs aren't available \u2014 e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. The script is evaluated via page.evaluate(); return a value to get it back.",
|
|
1057
|
+
inputSchema: {
|
|
1058
|
+
type: "object",
|
|
1059
|
+
properties: {
|
|
1060
|
+
script: {
|
|
1061
|
+
type: "string",
|
|
1062
|
+
description: "JavaScript code to evaluate in the page. Use a return statement or expression to get a result back."
|
|
1063
|
+
}
|
|
1064
|
+
},
|
|
1065
|
+
required: ["script"]
|
|
1066
|
+
},
|
|
1067
|
+
handler: async (input) => {
|
|
1068
|
+
const session = getSession();
|
|
1069
|
+
const script = String(input.script ?? "");
|
|
1070
|
+
if (!script) throw new Error("script is required");
|
|
1071
|
+
const result = await session.executeJs(getConversationId(), script);
|
|
1072
|
+
return { result: result ?? null };
|
|
1073
|
+
}
|
|
1074
|
+
},
|
|
910
1075
|
{
|
|
911
1076
|
name: "browser_type",
|
|
912
1077
|
description: "Type text into a form field identified by its ref from the last snapshot. This clears the field first, then types the new value.",
|
package/package.json
CHANGED
package/src/session.ts
CHANGED
|
@@ -62,6 +62,51 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
|
|
|
62
62
|
|
|
63
63
|
const MAX_TABS = 8;
|
|
64
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Init script that forces new-tab navigations (window.open, target="_blank")
|
|
67
|
+
* to open in the current tab. Runs before page scripts on every navigation.
|
|
68
|
+
*/
|
|
69
|
+
const SAME_TAB_INIT_SCRIPT = `
|
|
70
|
+
(() => {
|
|
71
|
+
// Override window.open to navigate in-place
|
|
72
|
+
try {
|
|
73
|
+
const origOpen = window.open;
|
|
74
|
+
window.open = function(url, target, features) {
|
|
75
|
+
if (url) {
|
|
76
|
+
location.href = url;
|
|
77
|
+
return window;
|
|
78
|
+
}
|
|
79
|
+
return origOpen.call(this, url, target, features);
|
|
80
|
+
};
|
|
81
|
+
} catch {}
|
|
82
|
+
|
|
83
|
+
// Rewrite target="_blank" on existing and future links
|
|
84
|
+
try {
|
|
85
|
+
const rewrite = (el) => {
|
|
86
|
+
if (el.tagName === 'A' && el.target === '_blank') {
|
|
87
|
+
el.target = '_self';
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
// Catch links already in the DOM
|
|
91
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
92
|
+
document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
93
|
+
});
|
|
94
|
+
// Catch dynamically added links
|
|
95
|
+
new MutationObserver((mutations) => {
|
|
96
|
+
for (const m of mutations) {
|
|
97
|
+
for (const node of m.addedNodes) {
|
|
98
|
+
if (node.nodeType !== 1) continue;
|
|
99
|
+
rewrite(node);
|
|
100
|
+
if (node.querySelectorAll) {
|
|
101
|
+
node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}).observe(document.documentElement, { childList: true, subtree: true });
|
|
106
|
+
} catch {}
|
|
107
|
+
})();
|
|
108
|
+
`;
|
|
109
|
+
|
|
65
110
|
// Per-conversation tab state
|
|
66
111
|
interface ConversationTab {
|
|
67
112
|
tabIndex: number;
|
|
@@ -163,6 +208,21 @@ export class BrowserSession {
|
|
|
163
208
|
}
|
|
164
209
|
}
|
|
165
210
|
|
|
211
|
+
/**
|
|
212
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
213
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
214
|
+
* and can't see or interact with popups.
|
|
215
|
+
*/
|
|
216
|
+
private async installSameTabScript(mgr: BrowserManagerInstance): Promise<void> {
|
|
217
|
+
const ctx = mgr.getContext();
|
|
218
|
+
if (!ctx) return;
|
|
219
|
+
try {
|
|
220
|
+
await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
|
|
221
|
+
} catch (err) {
|
|
222
|
+
console.warn("[poncho][browser] Failed to install same-tab init script:", (err as Error)?.message ?? err);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
166
226
|
/**
|
|
167
227
|
* Override the user-agent via CDP on the current page target.
|
|
168
228
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
@@ -184,25 +244,74 @@ export class BrowserSession {
|
|
|
184
244
|
}
|
|
185
245
|
}
|
|
186
246
|
|
|
247
|
+
private get isRemote(): boolean {
|
|
248
|
+
return !!(this.config.provider || this.config.cdpUrl);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
private get isServerless(): boolean {
|
|
252
|
+
return !!(
|
|
253
|
+
process.env.VERCEL ||
|
|
254
|
+
process.env.AWS_LAMBDA_FUNCTION_NAME ||
|
|
255
|
+
process.env.AWS_EXECUTION_ENV ||
|
|
256
|
+
process.env.SERVERLESS
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Resolve executablePath for local launches. When no explicit path is set
|
|
262
|
+
* and we're on a serverless platform, try `@sparticuz/chromium` automatically.
|
|
263
|
+
*/
|
|
264
|
+
private async resolveExecutablePath(): Promise<string | undefined> {
|
|
265
|
+
if (this.config.executablePath) return this.config.executablePath;
|
|
266
|
+
if (!this.isServerless) return undefined;
|
|
267
|
+
try {
|
|
268
|
+
// Dynamic require — @sparticuz/chromium is an optional peer dependency
|
|
269
|
+
// that the user installs in their agent project for serverless runtimes.
|
|
270
|
+
const spec = ["@sparticuz", "chromium"].join("/");
|
|
271
|
+
const mod = await import(/* webpackIgnore: true */ spec);
|
|
272
|
+
const chromium = mod.default ?? mod;
|
|
273
|
+
const path = await chromium.executablePath();
|
|
274
|
+
console.log(`[poncho][browser] Auto-detected @sparticuz/chromium: ${path}`);
|
|
275
|
+
return path;
|
|
276
|
+
} catch {
|
|
277
|
+
return undefined;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
187
281
|
private async launchFreshManager(): Promise<BrowserManagerInstance> {
|
|
188
282
|
const Ctor = await getBrowserManagerCtor();
|
|
189
283
|
const mgr = new Ctor();
|
|
190
284
|
|
|
191
285
|
const viewport = this.config.viewport ?? { width: 1280, height: 720 };
|
|
192
|
-
await
|
|
286
|
+
const executablePath = await this.resolveExecutablePath();
|
|
193
287
|
|
|
194
288
|
const launchOpts: Record<string, unknown> = {
|
|
195
289
|
action: "launch",
|
|
196
290
|
headless: this.config.headless ?? true,
|
|
197
291
|
viewport: { width: viewport.width ?? 1280, height: viewport.height ?? 720 },
|
|
198
|
-
executablePath
|
|
199
|
-
profile: this.profileDir,
|
|
292
|
+
executablePath,
|
|
200
293
|
};
|
|
201
294
|
|
|
295
|
+
if (this.config.cdpUrl) {
|
|
296
|
+
launchOpts.cdpUrl = this.config.cdpUrl;
|
|
297
|
+
console.log(`[poncho][browser] Connecting via CDP: ${this.config.cdpUrl}`);
|
|
298
|
+
} else if (this.config.provider) {
|
|
299
|
+
launchOpts.provider = this.config.provider;
|
|
300
|
+
console.log(`[poncho][browser] Using cloud provider: ${this.config.provider}`);
|
|
301
|
+
} else {
|
|
302
|
+
const profileDir = this.isServerless && !this.config.profileDir
|
|
303
|
+
? join(tmpdir(), "poncho-browser", this.sessionId)
|
|
304
|
+
: this.profileDir;
|
|
305
|
+
await mkdir(profileDir, { recursive: true });
|
|
306
|
+
launchOpts.profile = profileDir;
|
|
307
|
+
}
|
|
308
|
+
|
|
202
309
|
if (this.stealthEnabled) {
|
|
203
310
|
const ua = this.stealthUserAgent!;
|
|
204
311
|
launchOpts.userAgent = ua;
|
|
205
|
-
|
|
312
|
+
if (!this.isRemote) {
|
|
313
|
+
launchOpts.args = buildStealthArgs(ua);
|
|
314
|
+
}
|
|
206
315
|
console.log("[poncho][browser] Launching with stealth mode enabled (UA: " + ua + ")");
|
|
207
316
|
} else if (this.config.userAgent) {
|
|
208
317
|
launchOpts.userAgent = this.config.userAgent;
|
|
@@ -219,6 +328,9 @@ export class BrowserSession {
|
|
|
219
328
|
await this.installContextStealth(mgr);
|
|
220
329
|
}
|
|
221
330
|
|
|
331
|
+
// Redirect new-tab navigations into the current tab
|
|
332
|
+
await this.installSameTabScript(mgr);
|
|
333
|
+
|
|
222
334
|
try {
|
|
223
335
|
const cdp = await mgr.getCDPSession();
|
|
224
336
|
await cdp.send("Debugger.disable");
|
|
@@ -475,6 +587,32 @@ export class BrowserSession {
|
|
|
475
587
|
}
|
|
476
588
|
}
|
|
477
589
|
|
|
590
|
+
async clickText(conversationId: string, text: string, exact?: boolean): Promise<void> {
|
|
591
|
+
await this.lock();
|
|
592
|
+
try {
|
|
593
|
+
const mgr = await this.ensureManager();
|
|
594
|
+
const tab = await this.switchToConversation(mgr, conversationId);
|
|
595
|
+
const selector = exact ? `text="${text}"` : `text=${text}`;
|
|
596
|
+
const locator = mgr.getLocator(selector);
|
|
597
|
+
await locator.click();
|
|
598
|
+
tab.url = mgr.getPage().url();
|
|
599
|
+
} finally {
|
|
600
|
+
this.unlock();
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
async executeJs(conversationId: string, script: string): Promise<unknown> {
|
|
605
|
+
await this.lock();
|
|
606
|
+
try {
|
|
607
|
+
const mgr = await this.ensureManager();
|
|
608
|
+
await this.switchToConversation(mgr, conversationId);
|
|
609
|
+
const page = mgr.getPage();
|
|
610
|
+
return await page.evaluate(script);
|
|
611
|
+
} finally {
|
|
612
|
+
this.unlock();
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
478
616
|
async closeTab(conversationId: string): Promise<void> {
|
|
479
617
|
await this.lock();
|
|
480
618
|
try {
|
package/src/tools.ts
CHANGED
|
@@ -73,6 +73,62 @@ export function createBrowserTools(
|
|
|
73
73
|
return { clicked: ref };
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
|
+
{
|
|
77
|
+
name: "browser_click_text",
|
|
78
|
+
description:
|
|
79
|
+
"Click the first visible element on the page that contains the given text. " +
|
|
80
|
+
"Use this when an element doesn't appear in the snapshot — e.g. styled divs acting as buttons. " +
|
|
81
|
+
"By default matches substring (case-insensitive); set exact=true for exact text match.",
|
|
82
|
+
inputSchema: {
|
|
83
|
+
type: "object",
|
|
84
|
+
properties: {
|
|
85
|
+
text: {
|
|
86
|
+
type: "string",
|
|
87
|
+
description: "The visible text of the element to click",
|
|
88
|
+
},
|
|
89
|
+
exact: {
|
|
90
|
+
type: "boolean",
|
|
91
|
+
description:
|
|
92
|
+
"If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive).",
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
required: ["text"],
|
|
96
|
+
},
|
|
97
|
+
handler: async (input: BrowserToolInput) => {
|
|
98
|
+
const session = getSession();
|
|
99
|
+
const text = String(input.text ?? "");
|
|
100
|
+
if (!text) throw new Error("text is required");
|
|
101
|
+
const exact = input.exact === true;
|
|
102
|
+
await session.clickText(getConversationId(), text, exact);
|
|
103
|
+
return { clicked: text, exact };
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
name: "browser_execute_js",
|
|
108
|
+
description:
|
|
109
|
+
"Execute JavaScript in the current page context and return the result. " +
|
|
110
|
+
"Use this to inspect or interact with the DOM when snapshot refs aren't available — " +
|
|
111
|
+
"e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. " +
|
|
112
|
+
"The script is evaluated via page.evaluate(); return a value to get it back.",
|
|
113
|
+
inputSchema: {
|
|
114
|
+
type: "object",
|
|
115
|
+
properties: {
|
|
116
|
+
script: {
|
|
117
|
+
type: "string",
|
|
118
|
+
description:
|
|
119
|
+
"JavaScript code to evaluate in the page. Use a return statement or expression to get a result back.",
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
required: ["script"],
|
|
123
|
+
},
|
|
124
|
+
handler: async (input: BrowserToolInput) => {
|
|
125
|
+
const session = getSession();
|
|
126
|
+
const script = String(input.script ?? "");
|
|
127
|
+
if (!script) throw new Error("script is required");
|
|
128
|
+
const result = await session.executeJs(getConversationId(), script);
|
|
129
|
+
return { result: result ?? null };
|
|
130
|
+
},
|
|
131
|
+
},
|
|
76
132
|
{
|
|
77
133
|
name: "browser_type",
|
|
78
134
|
description:
|
package/src/types.ts
CHANGED
|
@@ -70,4 +70,11 @@ export interface BrowserConfig {
|
|
|
70
70
|
* user-agent, and passes anti-automation Chrome flags. */
|
|
71
71
|
stealth?: boolean;
|
|
72
72
|
storagePersistence?: BrowserStoragePersistence;
|
|
73
|
+
/** Cloud browser provider. Requires the provider's API key env var to be set
|
|
74
|
+
* (e.g. `BROWSERBASE_API_KEY` + `BROWSERBASE_PROJECT_ID` for Browserbase).
|
|
75
|
+
* When set, the browser runs remotely instead of launching a local Chromium. */
|
|
76
|
+
provider?: "browserbase" | "browseruse" | "kernel";
|
|
77
|
+
/** Connect to an existing browser via Chrome DevTools Protocol URL or port.
|
|
78
|
+
* Mutually exclusive with `provider`. */
|
|
79
|
+
cdpUrl?: string;
|
|
73
80
|
}
|