@poncho-ai/browser 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -3
- package/.turbo/turbo-test.log +12 -0
- package/CHANGELOG.md +6 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +126 -0
- package/package.json +1 -1
- package/src/session.ts +89 -0
- package/src/tools.ts +56 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @poncho-ai/browser@0.4.0 build /
|
|
2
|
+
> @poncho-ai/browser@0.4.0 build /Users/cesar/Dev/latitude/poncho-ai/packages/browser
|
|
3
3
|
> tsup src/index.ts --format esm --dts
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
[34mCLI[39m Target: es2022
|
|
9
9
|
[34mESM[39m Build start
|
|
10
10
|
[32mESM[39m [1mdist/index.js [22m[32m34.91 KB[39m
|
|
11
|
-
[32mESM[39m ⚡️ Build success in
|
|
11
|
+
[32mESM[39m ⚡️ Build success in 218ms
|
|
12
12
|
[34mDTS[39m Build start
|
|
13
|
-
[32mDTS[39m ⚡️ Build success in
|
|
13
|
+
[32mDTS[39m ⚡️ Build success in 5616ms
|
|
14
14
|
[32mDTS[39m [1mdist/index.d.ts [22m[32m12.44 KB[39m
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+
> @poncho-ai/browser@0.3.0 test /Users/cesar/Dev/latitude/poncho-ai/packages/browser
|
|
3
|
+
> vitest --passWithNoTests
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
[7m[1m[36m RUN [39m[22m[27m [36mv1.6.1[39m [90m/Users/cesar/Dev/latitude/poncho-ai/packages/browser[39m
|
|
7
|
+
|
|
8
|
+
[2minclude: [22m[33m**/*.{test,spec}.?(c|m)[jt]s?(x)[39m
|
|
9
|
+
[2mexclude: [22m[33m**/node_modules/**[2m, [22m**/dist/**[2m, [22m**/cypress/**[2m, [22m**/.{idea,git,cache,output,temp}/**[2m, [22m**/{karma,rollup,webpack,vite,vitest,jest,ava,babel,nyc,cypress,tsup,build,eslint,prettier}.config.*[39m
|
|
10
|
+
[2mwatch exclude: [22m[33m**/node_modules/**[2m, [22m**/dist/**[39m
|
|
11
|
+
No test files found, exiting with code 0
|
|
12
|
+
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @poncho-ai/browser
|
|
2
2
|
|
|
3
|
+
## 0.5.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- [`540c8e6`](https://github.com/cesr/poncho-ai/commit/540c8e6d895a95c2f215deb4af219069543371d9) Thanks [@cesr](https://github.com/cesr)! - Add `browser_click_text` and `browser_execute_js` tools for interacting with elements that don't appear in the accessibility snapshot (e.g. styled divs acting as buttons). Also force new-tab navigations (`window.open`, `target="_blank"`) to stay in the current tab so agents don't lose context.
|
|
8
|
+
|
|
3
9
|
## 0.4.0
|
|
4
10
|
|
|
5
11
|
### Minor Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -90,6 +90,12 @@ declare class BrowserSession {
|
|
|
90
90
|
* Only needs to be called once per browser launch.
|
|
91
91
|
*/
|
|
92
92
|
private installContextStealth;
|
|
93
|
+
/**
|
|
94
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
95
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
96
|
+
* and can't see or interact with popups.
|
|
97
|
+
*/
|
|
98
|
+
private installSameTabScript;
|
|
93
99
|
/**
|
|
94
100
|
* Override the user-agent via CDP on the current page target.
|
|
95
101
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
@@ -122,6 +128,8 @@ declare class BrowserSession {
|
|
|
122
128
|
title: string;
|
|
123
129
|
}>;
|
|
124
130
|
scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
|
|
131
|
+
clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
|
|
132
|
+
executeJs(conversationId: string, script: string): Promise<unknown>;
|
|
125
133
|
closeTab(conversationId: string): Promise<void>;
|
|
126
134
|
navigate(conversationId: string, action: string): Promise<void>;
|
|
127
135
|
startScreencast(conversationId: string, options?: ScreencastOptions): Promise<void>;
|
package/dist/index.js
CHANGED
|
@@ -186,6 +186,46 @@ async function getBrowserManagerCtor() {
|
|
|
186
186
|
return BrowserManagerCtor;
|
|
187
187
|
}
|
|
188
188
|
var MAX_TABS = 8;
|
|
189
|
+
var SAME_TAB_INIT_SCRIPT = `
|
|
190
|
+
(() => {
|
|
191
|
+
// Override window.open to navigate in-place
|
|
192
|
+
try {
|
|
193
|
+
const origOpen = window.open;
|
|
194
|
+
window.open = function(url, target, features) {
|
|
195
|
+
if (url) {
|
|
196
|
+
location.href = url;
|
|
197
|
+
return window;
|
|
198
|
+
}
|
|
199
|
+
return origOpen.call(this, url, target, features);
|
|
200
|
+
};
|
|
201
|
+
} catch {}
|
|
202
|
+
|
|
203
|
+
// Rewrite target="_blank" on existing and future links
|
|
204
|
+
try {
|
|
205
|
+
const rewrite = (el) => {
|
|
206
|
+
if (el.tagName === 'A' && el.target === '_blank') {
|
|
207
|
+
el.target = '_self';
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
// Catch links already in the DOM
|
|
211
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
212
|
+
document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
213
|
+
});
|
|
214
|
+
// Catch dynamically added links
|
|
215
|
+
new MutationObserver((mutations) => {
|
|
216
|
+
for (const m of mutations) {
|
|
217
|
+
for (const node of m.addedNodes) {
|
|
218
|
+
if (node.nodeType !== 1) continue;
|
|
219
|
+
rewrite(node);
|
|
220
|
+
if (node.querySelectorAll) {
|
|
221
|
+
node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}).observe(document.documentElement, { childList: true, subtree: true });
|
|
226
|
+
} catch {}
|
|
227
|
+
})();
|
|
228
|
+
`;
|
|
189
229
|
var BrowserSession = class {
|
|
190
230
|
config;
|
|
191
231
|
sessionId;
|
|
@@ -264,6 +304,20 @@ var BrowserSession = class {
|
|
|
264
304
|
console.warn("[poncho][browser] Failed to install stealth init script:", err?.message ?? err);
|
|
265
305
|
}
|
|
266
306
|
}
|
|
307
|
+
/**
|
|
308
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
309
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
310
|
+
* and can't see or interact with popups.
|
|
311
|
+
*/
|
|
312
|
+
async installSameTabScript(mgr) {
|
|
313
|
+
const ctx = mgr.getContext();
|
|
314
|
+
if (!ctx) return;
|
|
315
|
+
try {
|
|
316
|
+
await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
|
|
317
|
+
} catch (err) {
|
|
318
|
+
console.warn("[poncho][browser] Failed to install same-tab init script:", err?.message ?? err);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
267
321
|
/**
|
|
268
322
|
* Override the user-agent via CDP on the current page target.
|
|
269
323
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
@@ -310,6 +364,7 @@ var BrowserSession = class {
|
|
|
310
364
|
if (this.stealthEnabled) {
|
|
311
365
|
await this.installContextStealth(mgr);
|
|
312
366
|
}
|
|
367
|
+
await this.installSameTabScript(mgr);
|
|
313
368
|
try {
|
|
314
369
|
const cdp = await mgr.getCDPSession();
|
|
315
370
|
await cdp.send("Debugger.disable");
|
|
@@ -558,6 +613,30 @@ var BrowserSession = class {
|
|
|
558
613
|
this.unlock();
|
|
559
614
|
}
|
|
560
615
|
}
|
|
616
|
+
async clickText(conversationId, text, exact) {
|
|
617
|
+
await this.lock();
|
|
618
|
+
try {
|
|
619
|
+
const mgr = await this.ensureManager();
|
|
620
|
+
const tab = await this.switchToConversation(mgr, conversationId);
|
|
621
|
+
const selector = exact ? `text="${text}"` : `text=${text}`;
|
|
622
|
+
const locator = mgr.getLocator(selector);
|
|
623
|
+
await locator.click();
|
|
624
|
+
tab.url = mgr.getPage().url();
|
|
625
|
+
} finally {
|
|
626
|
+
this.unlock();
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
async executeJs(conversationId, script) {
|
|
630
|
+
await this.lock();
|
|
631
|
+
try {
|
|
632
|
+
const mgr = await this.ensureManager();
|
|
633
|
+
await this.switchToConversation(mgr, conversationId);
|
|
634
|
+
const page = mgr.getPage();
|
|
635
|
+
return await page.evaluate(script);
|
|
636
|
+
} finally {
|
|
637
|
+
this.unlock();
|
|
638
|
+
}
|
|
639
|
+
}
|
|
561
640
|
async closeTab(conversationId) {
|
|
562
641
|
await this.lock();
|
|
563
642
|
try {
|
|
@@ -907,6 +986,53 @@ function createBrowserTools(getSession, getConversationId) {
|
|
|
907
986
|
return { clicked: ref };
|
|
908
987
|
}
|
|
909
988
|
},
|
|
989
|
+
{
|
|
990
|
+
name: "browser_click_text",
|
|
991
|
+
description: "Click the first visible element on the page that contains the given text. Use this when an element doesn't appear in the snapshot \u2014 e.g. styled divs acting as buttons. By default matches substring (case-insensitive); set exact=true for exact text match.",
|
|
992
|
+
inputSchema: {
|
|
993
|
+
type: "object",
|
|
994
|
+
properties: {
|
|
995
|
+
text: {
|
|
996
|
+
type: "string",
|
|
997
|
+
description: "The visible text of the element to click"
|
|
998
|
+
},
|
|
999
|
+
exact: {
|
|
1000
|
+
type: "boolean",
|
|
1001
|
+
description: "If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive)."
|
|
1002
|
+
}
|
|
1003
|
+
},
|
|
1004
|
+
required: ["text"]
|
|
1005
|
+
},
|
|
1006
|
+
handler: async (input) => {
|
|
1007
|
+
const session = getSession();
|
|
1008
|
+
const text = String(input.text ?? "");
|
|
1009
|
+
if (!text) throw new Error("text is required");
|
|
1010
|
+
const exact = input.exact === true;
|
|
1011
|
+
await session.clickText(getConversationId(), text, exact);
|
|
1012
|
+
return { clicked: text, exact };
|
|
1013
|
+
}
|
|
1014
|
+
},
|
|
1015
|
+
{
|
|
1016
|
+
name: "browser_execute_js",
|
|
1017
|
+
description: "Execute JavaScript in the current page context and return the result. Use this to inspect or interact with the DOM when snapshot refs aren't available \u2014 e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. The script is evaluated via page.evaluate(); return a value to get it back.",
|
|
1018
|
+
inputSchema: {
|
|
1019
|
+
type: "object",
|
|
1020
|
+
properties: {
|
|
1021
|
+
script: {
|
|
1022
|
+
type: "string",
|
|
1023
|
+
description: "JavaScript code to evaluate in the page. Use a return statement or expression to get a result back."
|
|
1024
|
+
}
|
|
1025
|
+
},
|
|
1026
|
+
required: ["script"]
|
|
1027
|
+
},
|
|
1028
|
+
handler: async (input) => {
|
|
1029
|
+
const session = getSession();
|
|
1030
|
+
const script = String(input.script ?? "");
|
|
1031
|
+
if (!script) throw new Error("script is required");
|
|
1032
|
+
const result = await session.executeJs(getConversationId(), script);
|
|
1033
|
+
return { result: result ?? null };
|
|
1034
|
+
}
|
|
1035
|
+
},
|
|
910
1036
|
{
|
|
911
1037
|
name: "browser_type",
|
|
912
1038
|
description: "Type text into a form field identified by its ref from the last snapshot. This clears the field first, then types the new value.",
|
package/package.json
CHANGED
package/src/session.ts
CHANGED
|
@@ -62,6 +62,51 @@ async function getBrowserManagerCtor(): Promise<new () => BrowserManagerInstance
|
|
|
62
62
|
|
|
63
63
|
const MAX_TABS = 8;
|
|
64
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Init script that forces new-tab navigations (window.open, target="_blank")
|
|
67
|
+
* to open in the current tab. Runs before page scripts on every navigation.
|
|
68
|
+
*/
|
|
69
|
+
const SAME_TAB_INIT_SCRIPT = `
|
|
70
|
+
(() => {
|
|
71
|
+
// Override window.open to navigate in-place
|
|
72
|
+
try {
|
|
73
|
+
const origOpen = window.open;
|
|
74
|
+
window.open = function(url, target, features) {
|
|
75
|
+
if (url) {
|
|
76
|
+
location.href = url;
|
|
77
|
+
return window;
|
|
78
|
+
}
|
|
79
|
+
return origOpen.call(this, url, target, features);
|
|
80
|
+
};
|
|
81
|
+
} catch {}
|
|
82
|
+
|
|
83
|
+
// Rewrite target="_blank" on existing and future links
|
|
84
|
+
try {
|
|
85
|
+
const rewrite = (el) => {
|
|
86
|
+
if (el.tagName === 'A' && el.target === '_blank') {
|
|
87
|
+
el.target = '_self';
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
// Catch links already in the DOM
|
|
91
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
92
|
+
document.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
93
|
+
});
|
|
94
|
+
// Catch dynamically added links
|
|
95
|
+
new MutationObserver((mutations) => {
|
|
96
|
+
for (const m of mutations) {
|
|
97
|
+
for (const node of m.addedNodes) {
|
|
98
|
+
if (node.nodeType !== 1) continue;
|
|
99
|
+
rewrite(node);
|
|
100
|
+
if (node.querySelectorAll) {
|
|
101
|
+
node.querySelectorAll('a[target="_blank"]').forEach(rewrite);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}).observe(document.documentElement, { childList: true, subtree: true });
|
|
106
|
+
} catch {}
|
|
107
|
+
})();
|
|
108
|
+
`;
|
|
109
|
+
|
|
65
110
|
// Per-conversation tab state
|
|
66
111
|
interface ConversationTab {
|
|
67
112
|
tabIndex: number;
|
|
@@ -163,6 +208,21 @@ export class BrowserSession {
|
|
|
163
208
|
}
|
|
164
209
|
}
|
|
165
210
|
|
|
211
|
+
/**
|
|
212
|
+
* Force all new-tab navigations (window.open, target="_blank") to open
|
|
213
|
+
* in the current tab instead. Agents operate on a single tab at a time
|
|
214
|
+
* and can't see or interact with popups.
|
|
215
|
+
*/
|
|
216
|
+
private async installSameTabScript(mgr: BrowserManagerInstance): Promise<void> {
|
|
217
|
+
const ctx = mgr.getContext();
|
|
218
|
+
if (!ctx) return;
|
|
219
|
+
try {
|
|
220
|
+
await ctx.addInitScript({ content: SAME_TAB_INIT_SCRIPT });
|
|
221
|
+
} catch (err) {
|
|
222
|
+
console.warn("[poncho][browser] Failed to install same-tab init script:", (err as Error)?.message ?? err);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
166
226
|
/**
|
|
167
227
|
* Override the user-agent via CDP on the current page target.
|
|
168
228
|
* CDP Network.setUserAgentOverride is per-target, so call per-tab.
|
|
@@ -219,6 +279,9 @@ export class BrowserSession {
|
|
|
219
279
|
await this.installContextStealth(mgr);
|
|
220
280
|
}
|
|
221
281
|
|
|
282
|
+
// Redirect new-tab navigations into the current tab
|
|
283
|
+
await this.installSameTabScript(mgr);
|
|
284
|
+
|
|
222
285
|
try {
|
|
223
286
|
const cdp = await mgr.getCDPSession();
|
|
224
287
|
await cdp.send("Debugger.disable");
|
|
@@ -475,6 +538,32 @@ export class BrowserSession {
|
|
|
475
538
|
}
|
|
476
539
|
}
|
|
477
540
|
|
|
541
|
+
async clickText(conversationId: string, text: string, exact?: boolean): Promise<void> {
|
|
542
|
+
await this.lock();
|
|
543
|
+
try {
|
|
544
|
+
const mgr = await this.ensureManager();
|
|
545
|
+
const tab = await this.switchToConversation(mgr, conversationId);
|
|
546
|
+
const selector = exact ? `text="${text}"` : `text=${text}`;
|
|
547
|
+
const locator = mgr.getLocator(selector);
|
|
548
|
+
await locator.click();
|
|
549
|
+
tab.url = mgr.getPage().url();
|
|
550
|
+
} finally {
|
|
551
|
+
this.unlock();
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
async executeJs(conversationId: string, script: string): Promise<unknown> {
|
|
556
|
+
await this.lock();
|
|
557
|
+
try {
|
|
558
|
+
const mgr = await this.ensureManager();
|
|
559
|
+
await this.switchToConversation(mgr, conversationId);
|
|
560
|
+
const page = mgr.getPage();
|
|
561
|
+
return await page.evaluate(script);
|
|
562
|
+
} finally {
|
|
563
|
+
this.unlock();
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
478
567
|
async closeTab(conversationId: string): Promise<void> {
|
|
479
568
|
await this.lock();
|
|
480
569
|
try {
|
package/src/tools.ts
CHANGED
|
@@ -73,6 +73,62 @@ export function createBrowserTools(
|
|
|
73
73
|
return { clicked: ref };
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
|
+
{
|
|
77
|
+
name: "browser_click_text",
|
|
78
|
+
description:
|
|
79
|
+
"Click the first visible element on the page that contains the given text. " +
|
|
80
|
+
"Use this when an element doesn't appear in the snapshot — e.g. styled divs acting as buttons. " +
|
|
81
|
+
"By default matches substring (case-insensitive); set exact=true for exact text match.",
|
|
82
|
+
inputSchema: {
|
|
83
|
+
type: "object",
|
|
84
|
+
properties: {
|
|
85
|
+
text: {
|
|
86
|
+
type: "string",
|
|
87
|
+
description: "The visible text of the element to click",
|
|
88
|
+
},
|
|
89
|
+
exact: {
|
|
90
|
+
type: "boolean",
|
|
91
|
+
description:
|
|
92
|
+
"If true, match the exact full text (case-sensitive). Default: false (substring, case-insensitive).",
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
required: ["text"],
|
|
96
|
+
},
|
|
97
|
+
handler: async (input: BrowserToolInput) => {
|
|
98
|
+
const session = getSession();
|
|
99
|
+
const text = String(input.text ?? "");
|
|
100
|
+
if (!text) throw new Error("text is required");
|
|
101
|
+
const exact = input.exact === true;
|
|
102
|
+
await session.clickText(getConversationId(), text, exact);
|
|
103
|
+
return { clicked: text, exact };
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
name: "browser_execute_js",
|
|
108
|
+
description:
|
|
109
|
+
"Execute JavaScript in the current page context and return the result. " +
|
|
110
|
+
"Use this to inspect or interact with the DOM when snapshot refs aren't available — " +
|
|
111
|
+
"e.g. finding elements by text content, getting bounding boxes, or clicking elements by selector. " +
|
|
112
|
+
"The script is evaluated via page.evaluate(); return a value to get it back.",
|
|
113
|
+
inputSchema: {
|
|
114
|
+
type: "object",
|
|
115
|
+
properties: {
|
|
116
|
+
script: {
|
|
117
|
+
type: "string",
|
|
118
|
+
description:
|
|
119
|
+
"JavaScript code to evaluate in the page. Use a return statement or expression to get a result back.",
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
required: ["script"],
|
|
123
|
+
},
|
|
124
|
+
handler: async (input: BrowserToolInput) => {
|
|
125
|
+
const session = getSession();
|
|
126
|
+
const script = String(input.script ?? "");
|
|
127
|
+
if (!script) throw new Error("script is required");
|
|
128
|
+
const result = await session.executeJs(getConversationId(), script);
|
|
129
|
+
return { result: result ?? null };
|
|
130
|
+
},
|
|
131
|
+
},
|
|
76
132
|
{
|
|
77
133
|
name: "browser_type",
|
|
78
134
|
description:
|