zan-browser 1.3.37 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai.d.ts +3 -16
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +101 -208
- package/dist/ai.js.map +1 -1
- package/dist/blacklist.d.ts +2 -0
- package/dist/blacklist.d.ts.map +1 -0
- package/dist/blacklist.js +115 -0
- package/dist/blacklist.js.map +1 -0
- package/dist/browser-provider.d.ts +81 -0
- package/dist/browser-provider.d.ts.map +1 -0
- package/dist/browser-provider.js +6 -0
- package/dist/browser-provider.js.map +1 -0
- package/dist/browser.d.ts +1 -1
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +51 -60
- package/dist/browser.js.map +1 -1
- package/dist/index.d.ts +7 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/interceptor.d.ts +3 -3
- package/dist/interceptor.d.ts.map +1 -1
- package/dist/interceptor.js +45 -19
- package/dist/interceptor.js.map +1 -1
- package/dist/navigator.d.ts +40 -0
- package/dist/navigator.d.ts.map +1 -0
- package/dist/navigator.js +507 -0
- package/dist/navigator.js.map +1 -0
- package/dist/observer.d.ts +23 -3
- package/dist/observer.d.ts.map +1 -1
- package/dist/observer.js +310 -270
- package/dist/observer.js.map +1 -1
- package/dist/perception.d.ts +42 -0
- package/dist/perception.d.ts.map +1 -0
- package/dist/perception.js +140 -0
- package/dist/perception.js.map +1 -0
- package/dist/providers/browserbase.d.ts +12 -0
- package/dist/providers/browserbase.d.ts.map +1 -0
- package/dist/providers/browserbase.js +226 -0
- package/dist/providers/browserbase.js.map +1 -0
- package/dist/providers/puppeteer-local.d.ts +5 -0
- package/dist/providers/puppeteer-local.d.ts.map +1 -0
- package/dist/providers/puppeteer-local.js +218 -0
- package/dist/providers/puppeteer-local.js.map +1 -0
- package/dist/session.d.ts +17 -23
- package/dist/session.d.ts.map +1 -1
- package/dist/session.js +112 -401
- package/dist/session.js.map +1 -1
- package/dist/types.d.ts +1 -32
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -2
package/dist/session.js
CHANGED
|
@@ -5,56 +5,43 @@ const interceptor_1 = require("./interceptor");
|
|
|
5
5
|
const observer_1 = require("./observer");
|
|
6
6
|
class Session {
|
|
7
7
|
page;
|
|
8
|
-
|
|
8
|
+
browserHandle;
|
|
9
9
|
interceptor;
|
|
10
10
|
observer;
|
|
11
11
|
ai;
|
|
12
12
|
defaultTimeout;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
constructor(page, browser, ai, timeout = 30_000, viewerUrl) {
|
|
13
|
+
viewerUrl;
|
|
14
|
+
constructor(page, browserHandle, ai, timeout = 30_000, viewerUrl) {
|
|
16
15
|
this.page = page;
|
|
17
|
-
this.
|
|
16
|
+
this.browserHandle = browserHandle;
|
|
18
17
|
this.ai = ai;
|
|
19
18
|
this.defaultTimeout = timeout;
|
|
20
19
|
this.viewerUrl = viewerUrl;
|
|
21
20
|
this.interceptor = new interceptor_1.Interceptor(page);
|
|
22
21
|
this.observer = new observer_1.Observer(page);
|
|
23
22
|
// Start capturing traffic immediately — t=0
|
|
24
|
-
// This catches requests the site fires on load before we do anything
|
|
25
23
|
this.interceptor.start();
|
|
26
24
|
}
|
|
25
|
+
// ─── Accessors for SmartNavigator ───────────────────────────────────────────
|
|
26
|
+
getPage() { return this.page; }
|
|
27
|
+
getInterceptor() { return this.interceptor; }
|
|
28
|
+
getObserver() { return this.observer; }
|
|
29
|
+
getAI() { return this.ai; }
|
|
27
30
|
// ─── Navigation ───────────────────────────────────────────────────────────────
|
|
28
31
|
async goto(url, options) {
|
|
29
32
|
const timeout = options?.timeout ?? this.defaultTimeout;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
catch (err) {
|
|
37
|
-
// On timeout, don't destroy the session — wait for network idle on the
|
|
38
|
-
// partially loaded page, then continue if the URL actually changed.
|
|
39
|
-
try {
|
|
40
|
-
await this.page.waitForLoadState("networkidle", { timeout: 5_000 });
|
|
41
|
-
}
|
|
42
|
-
catch {
|
|
43
|
-
// networkidle also timed out — that's fine, continue with what we have
|
|
44
|
-
}
|
|
45
|
-
const currentUrl = this.page.url();
|
|
46
|
-
if (currentUrl === "about:blank" || currentUrl === "") {
|
|
47
|
-
throw new Error(`Navigation to ${url} failed: page is still on about:blank — ${err.message}`);
|
|
48
|
-
}
|
|
49
|
-
// Page URL changed — partial load is usable, continue
|
|
50
|
-
}
|
|
33
|
+
await this.page.goto(url, {
|
|
34
|
+
waitUntil: "domcontentloaded",
|
|
35
|
+
timeout,
|
|
36
|
+
});
|
|
51
37
|
// Wait for network to go idle — catches XHR that SPAs fire after DOM is parsed.
|
|
52
38
|
try {
|
|
53
|
-
await this.page.
|
|
39
|
+
await this.page.waitForNetworkIdle({ timeout: 8_000 });
|
|
54
40
|
}
|
|
55
41
|
catch {
|
|
56
42
|
// networkidle timed out — page likely has background polling, continue anyway
|
|
57
43
|
}
|
|
44
|
+
// Minimum 800ms after networkidle (or timeout) — some SPAs fire XHR slightly after idle
|
|
58
45
|
await this.page.waitForTimeout(800);
|
|
59
46
|
}
|
|
60
47
|
async wait(ms) {
|
|
@@ -65,21 +52,28 @@ class Session {
|
|
|
65
52
|
}
|
|
66
53
|
// ─── Observe — always first, before any action ────────────────────────────────
|
|
67
54
|
async observe() {
|
|
68
|
-
this.lastInteractiveElements.clear();
|
|
69
55
|
return this.observer.observe();
|
|
70
56
|
}
|
|
71
|
-
// ───
|
|
57
|
+
// ─── Form and autocomplete detection ────────────────────────────────────────
|
|
58
|
+
async detectForms() {
|
|
59
|
+
return this.observer.detectForms();
|
|
60
|
+
}
|
|
61
|
+
async detectAutocomplete() {
|
|
62
|
+
return this.observer.detectAutocomplete();
|
|
63
|
+
}
|
|
64
|
+
async detectErrors() {
|
|
65
|
+
return this.observer.detectErrors();
|
|
66
|
+
}
|
|
67
|
+
// ─── Screenshot ────────────────────────────────────────────────────────────────
|
|
72
68
|
async screenshot() {
|
|
73
|
-
const
|
|
69
|
+
const result = await this.page.screenshot({ type: "jpeg", quality: 80, encoding: "base64" });
|
|
74
70
|
return {
|
|
75
|
-
base64:
|
|
71
|
+
base64: typeof result === "string" ? result : result.toString("base64"),
|
|
76
72
|
mimeType: "image/jpeg",
|
|
77
73
|
timestamp: Date.now(),
|
|
78
74
|
};
|
|
79
75
|
}
|
|
80
76
|
// ─── DOM text extraction — fallback when no useful XHR is captured ────────────
|
|
81
|
-
// Strips chrome (nav, footer, scripts) and returns visible text capped at 15k chars.
|
|
82
|
-
// Used by web-agent when the page renders data server-side with no API calls.
|
|
83
77
|
async extractPageContent() {
|
|
84
78
|
return await this.page.evaluate(() => {
|
|
85
79
|
const clone = document.body.cloneNode(true);
|
|
@@ -93,344 +87,99 @@ class Session {
|
|
|
93
87
|
.slice(0, 15000);
|
|
94
88
|
});
|
|
95
89
|
}
|
|
96
|
-
// ───
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
const clone = document.body.cloneNode(true);
|
|
102
|
-
clone.querySelectorAll("script,style,nav,footer,header")
|
|
103
|
-
.forEach((el) => el.remove());
|
|
104
|
-
const content = clone.innerText
|
|
105
|
-
.split("\n")
|
|
106
|
-
.map((l) => l.trim())
|
|
107
|
-
.filter((l) => l.length > 0)
|
|
108
|
-
.join("\n")
|
|
109
|
-
.slice(0, 12000);
|
|
110
|
-
const links = [];
|
|
111
|
-
const seen = new Set();
|
|
112
|
-
document.querySelectorAll("a[href]").forEach((a) => {
|
|
113
|
-
const href = a.href;
|
|
114
|
-
const text = a.innerText.trim().slice(0, 100);
|
|
115
|
-
if (href && text && !seen.has(href) && href.startsWith("http")) {
|
|
116
|
-
seen.add(href);
|
|
117
|
-
links.push({ href, text });
|
|
118
|
-
}
|
|
119
|
-
});
|
|
120
|
-
return { content, links: links.slice(0, 80) };
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
// ─── Interactive DOM observation — full interactive element scan ─────────────
|
|
124
|
-
// Replaces extractPageWithLinks() for agent navigation: returns ALL interactive
|
|
125
|
-
// elements (buttons, menus, dropdowns, inputs) not just anchor links.
|
|
126
|
-
// Sets data-rid attributes on matched elements so clickById() can target them.
|
|
127
|
-
async observeInteractiveDom() {
|
|
128
|
-
const INTERACTIVE_SEL = [
|
|
129
|
-
"button", "[role='button']", "[role='switch']",
|
|
130
|
-
"input:not([type='hidden'])",
|
|
131
|
-
"a[href]", "select", "textarea",
|
|
132
|
-
"[role='menuitem']", "[role='option']", "[role='tab']", "[role='combobox']",
|
|
133
|
-
"[onclick]", "[tabindex]:not([tabindex='-1'])",
|
|
134
|
-
].join(", ");
|
|
135
|
-
const result = await this.page.evaluate((interactiveSel) => {
|
|
136
|
-
// Clean previous markers
|
|
137
|
-
document.querySelectorAll("[data-rid]").forEach((el) => el.removeAttribute("data-rid"));
|
|
138
|
-
const isVisible = (el) => {
|
|
139
|
-
const htmlEl = el;
|
|
140
|
-
// offsetParent is null when the element or any ancestor has display:none.
|
|
141
|
-
// Exception: <body>, <html>, and position:fixed/sticky elements legitimately have offsetParent === null.
|
|
142
|
-
if (htmlEl.offsetParent === null) {
|
|
143
|
-
const style = window.getComputedStyle(el);
|
|
144
|
-
const pos = style.position;
|
|
145
|
-
if (pos !== "fixed" && pos !== "sticky" && el.tagName !== "BODY" && el.tagName !== "HTML") {
|
|
146
|
-
return false;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
const style = window.getComputedStyle(el);
|
|
150
|
-
if (style.visibility === "hidden" || style.opacity === "0")
|
|
151
|
-
return false;
|
|
152
|
-
const rect = el.getBoundingClientRect();
|
|
153
|
-
if (rect.width === 0 || rect.height === 0)
|
|
154
|
-
return false;
|
|
155
|
-
// Check if element is within viewport bounds
|
|
156
|
-
const vh = window.innerHeight;
|
|
157
|
-
const vw = window.innerWidth;
|
|
158
|
-
return rect.bottom > 0 && rect.top < vh && rect.right > 0 && rect.left < vw;
|
|
159
|
-
};
|
|
160
|
-
const truncate = (v, max = 60) => v.length > max ? v.slice(0, max) : v;
|
|
161
|
-
const resolveUrl = (href) => {
|
|
162
|
-
if (!href)
|
|
163
|
-
return href;
|
|
164
|
-
try {
|
|
165
|
-
return new URL(href, window.location.href).href;
|
|
166
|
-
}
|
|
167
|
-
catch {
|
|
168
|
-
return href;
|
|
169
|
-
}
|
|
170
|
-
};
|
|
171
|
-
const ATTR_KEYS = [
|
|
172
|
-
"title", "type", "name", "role", "aria-label",
|
|
173
|
-
"placeholder", "value", "alt", "href", "aria-expanded",
|
|
174
|
-
];
|
|
175
|
-
const ATTR_RENDER_ORDER = [
|
|
176
|
-
"title", "type", "name", "role", "aria_label",
|
|
177
|
-
"placeholder", "value", "alt", "href", "aria_expanded",
|
|
178
|
-
];
|
|
179
|
-
const getAttrs = (el) => {
|
|
180
|
-
const out = {};
|
|
181
|
-
for (const attr of ATTR_KEYS) {
|
|
182
|
-
let val = el.getAttribute(attr);
|
|
183
|
-
if (val == null || val === "")
|
|
184
|
-
continue;
|
|
185
|
-
if (attr === "href")
|
|
186
|
-
val = resolveUrl(val);
|
|
187
|
-
const key = attr.replace(/-/g, "_");
|
|
188
|
-
out[key] = truncate(val);
|
|
189
|
-
}
|
|
190
|
-
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement)
|
|
191
|
-
&& el.value) {
|
|
192
|
-
out["value"] = truncate(el.value);
|
|
193
|
-
}
|
|
194
|
-
return out;
|
|
195
|
-
};
|
|
196
|
-
const getInnerText = (el) => (el.textContent ?? "").replace(/\s+/g, " ").trim().slice(0, 100);
|
|
197
|
-
// B — buttons, checkboxes, radios, switches, role="button"
|
|
198
|
-
// L — anchor links
|
|
199
|
-
// I — inputs, textareas, selects
|
|
200
|
-
// M — menuitem, option, tab, combobox, other interactables
|
|
201
|
-
const categorize = (el) => {
|
|
202
|
-
const tag = el.tagName.toLowerCase();
|
|
203
|
-
const role = el.getAttribute("role") ?? "";
|
|
204
|
-
const type = el.type ?? "";
|
|
205
|
-
if (tag === "a" && el.hasAttribute("href"))
|
|
206
|
-
return "L";
|
|
207
|
-
if (tag === "textarea" || tag === "select")
|
|
208
|
-
return "I";
|
|
209
|
-
if (tag === "input" && !["submit", "button", "checkbox", "radio", "hidden"].includes(type))
|
|
210
|
-
return "I";
|
|
211
|
-
if (tag === "button")
|
|
212
|
-
return "B";
|
|
213
|
-
if (tag === "input" && ["submit", "button", "checkbox", "radio"].includes(type))
|
|
214
|
-
return "B";
|
|
215
|
-
if (role === "button" || role === "switch")
|
|
216
|
-
return "B";
|
|
217
|
-
if (["menuitem", "option", "tab", "combobox"].includes(role))
|
|
218
|
-
return "M";
|
|
219
|
-
return "M";
|
|
220
|
-
};
|
|
221
|
-
const interactiveSet = new Set();
|
|
222
|
-
document.querySelectorAll(interactiveSel).forEach((el) => interactiveSet.add(el));
|
|
223
|
-
// Only include text near interactive elements — headings, labels, or
|
|
224
|
-
// nodes whose parent/grandparent contains an interactive child.
|
|
225
|
-
const isNearInteractive = (parent) => {
|
|
226
|
-
if (!parent)
|
|
227
|
-
return false;
|
|
228
|
-
const tag = parent.tagName.toLowerCase();
|
|
229
|
-
if (["h1", "h2", "h3", "h4", "h5", "h6", "label", "legend", "figcaption", "caption", "th", "li"].includes(tag))
|
|
230
|
-
return true;
|
|
231
|
-
try {
|
|
232
|
-
if (parent.querySelector(interactiveSel))
|
|
233
|
-
return true;
|
|
234
|
-
if (parent.parentElement?.querySelector(interactiveSel))
|
|
235
|
-
return true;
|
|
236
|
-
}
|
|
237
|
-
catch { /* selector might fail on exotic DOMs */ }
|
|
238
|
-
return false;
|
|
239
|
-
};
|
|
240
|
-
const SELF_CLOSING = new Set(["input", "img", "br", "hr"]);
|
|
241
|
-
const lines = [];
|
|
242
|
-
const elements = [];
|
|
243
|
-
const counters = { B: 0, L: 0, I: 0, M: 0 };
|
|
244
|
-
const walk = (node) => {
|
|
245
|
-
if (node.nodeType === Node.TEXT_NODE) {
|
|
246
|
-
const text = (node.textContent ?? "").replace(/\s+/g, " ").trim();
|
|
247
|
-
if (text.length < 2)
|
|
248
|
-
return;
|
|
249
|
-
const parent = node.parentElement;
|
|
250
|
-
if (!parent)
|
|
251
|
-
return;
|
|
252
|
-
if (parent.tagName === "SCRIPT" || parent.tagName === "STYLE" || parent.tagName === "NOSCRIPT")
|
|
253
|
-
return;
|
|
254
|
-
if (interactiveSet.has(parent))
|
|
255
|
-
return; // text goes inside the tag
|
|
256
|
-
if (!isVisible(parent))
|
|
257
|
-
return;
|
|
258
|
-
if (!isNearInteractive(parent))
|
|
259
|
-
return;
|
|
260
|
-
lines.push(`_[:] ${text.slice(0, 150)}`);
|
|
261
|
-
return;
|
|
262
|
-
}
|
|
263
|
-
if (node.nodeType !== Node.ELEMENT_NODE)
|
|
264
|
-
return;
|
|
265
|
-
const el = node;
|
|
266
|
-
const tag = el.tagName.toLowerCase();
|
|
267
|
-
if (tag === "script" || tag === "style" || tag === "noscript")
|
|
268
|
-
return;
|
|
269
|
-
if (interactiveSet.has(el)) {
|
|
270
|
-
if (!isVisible(el))
|
|
271
|
-
return;
|
|
272
|
-
const cat = categorize(el);
|
|
273
|
-
counters[cat]++;
|
|
274
|
-
const id = `${cat}${counters[cat]}`;
|
|
275
|
-
const attrs = getAttrs(el);
|
|
276
|
-
const text = getInnerText(el);
|
|
277
|
-
const htmlTag = tag === "a" ? "a" : tag;
|
|
278
|
-
// Mark element for Playwright selector lookup
|
|
279
|
-
el.setAttribute("data-rid", id);
|
|
280
|
-
const attrParts = [];
|
|
281
|
-
for (const key of ATTR_RENDER_ORDER) {
|
|
282
|
-
if (attrs[key])
|
|
283
|
-
attrParts.push(`${key}="${attrs[key]}"`);
|
|
284
|
-
}
|
|
285
|
-
const attrStr = attrParts.length > 0 ? " " + attrParts.join(" ") : "";
|
|
286
|
-
if (SELF_CLOSING.has(htmlTag)) {
|
|
287
|
-
lines.push(`${id}[:] <${htmlTag}${attrStr}/>`);
|
|
288
|
-
}
|
|
289
|
-
else {
|
|
290
|
-
lines.push(`${id}[:] <${htmlTag}${attrStr}>${text}</${htmlTag}>`);
|
|
291
|
-
}
|
|
292
|
-
elements.push({ id, selector: `[data-rid="${id}"]` });
|
|
293
|
-
return; // don't recurse into interactive elements
|
|
294
|
-
}
|
|
295
|
-
for (const child of Array.from(el.childNodes)) {
|
|
296
|
-
walk(child);
|
|
297
|
-
}
|
|
298
|
-
};
|
|
299
|
-
walk(document.body);
|
|
300
|
-
return { lines, elements };
|
|
301
|
-
}, INTERACTIVE_SEL);
|
|
302
|
-
this.lastInteractiveElements = new Map(result.elements.map((e) => [e.id, e.selector]));
|
|
303
|
-
return {
|
|
304
|
-
dom: result.lines.join("\n"),
|
|
305
|
-
elements: result.elements,
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
// ─── Intelligent DOM scrape — Notte-style content understanding ──────────────
|
|
309
|
-
// Passes visible page text to Claude Sonnet and asks whether the goal data is present.
|
|
310
|
-
// If not found, follows nextUrl suggestions up to maxHops times before giving up.
|
|
311
|
-
// Caller must supply initialContent (from extractPageContent()) to avoid a double call.
|
|
312
|
-
async scrapeWithGoal(goal, initialContent, seedValues, maxHops = 3, initialLinks) {
|
|
313
|
-
let content = initialContent;
|
|
314
|
-
let links = initialLinks;
|
|
315
|
-
for (let hop = 0; hop <= maxHops; hop++) {
|
|
316
|
-
const result = await this.ai.scrapePageContent(goal, content, seedValues, links);
|
|
317
|
-
if (result.found || !result.nextUrl || hop === maxHops) {
|
|
318
|
-
return result;
|
|
319
|
-
}
|
|
320
|
-
// Follow the suggested link and re-scrape
|
|
90
|
+
// ─── Interaction ──────────────────────────────────────────────────────────────
|
|
91
|
+
async clickById(elementId) {
|
|
92
|
+
// First try cached CSS selector (reliable)
|
|
93
|
+
const selector = this.observer.getSelector(elementId);
|
|
94
|
+
if (selector) {
|
|
321
95
|
try {
|
|
322
|
-
await this.
|
|
323
|
-
|
|
324
|
-
content = page.content;
|
|
325
|
-
links = page.links;
|
|
96
|
+
await this.page.click(selector);
|
|
97
|
+
return;
|
|
326
98
|
}
|
|
327
99
|
catch {
|
|
328
|
-
|
|
100
|
+
// Fall through to coordinate-based click
|
|
329
101
|
}
|
|
330
102
|
}
|
|
331
|
-
|
|
103
|
+
const coords = await this.observer.resolveElementId(elementId);
|
|
104
|
+
if (!coords)
|
|
105
|
+
throw new Error(`Element ${elementId} not found in DOM`);
|
|
106
|
+
await this.page.mouse.click(coords.x, coords.y);
|
|
332
107
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
async clickById(elementId) {
|
|
337
|
-
const pagesBefore = this.page.context().pages().length;
|
|
338
|
-
const selector = this.lastInteractiveElements.get(elementId);
|
|
108
|
+
async fillById(elementId, value) {
|
|
109
|
+
// First try cached CSS selector (reliable)
|
|
110
|
+
const selector = this.observer.getSelector(elementId);
|
|
339
111
|
if (selector) {
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
112
|
+
try {
|
|
113
|
+
await this.page.click(selector, { clickCount: 3 }); // select all
|
|
114
|
+
await this.page.keyboard.press("Backspace");
|
|
115
|
+
await this.page.type(selector, value, { delay: 80 });
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
// Fall through to coordinate-based fill
|
|
120
|
+
}
|
|
347
121
|
}
|
|
348
|
-
await this.switchToNewTabIfOpened(pagesBefore);
|
|
349
|
-
}
|
|
350
|
-
// Fill input by element ID from observe()
|
|
351
|
-
async fillById(elementId, value) {
|
|
352
122
|
const coords = await this.observer.resolveElementId(elementId);
|
|
353
123
|
if (!coords)
|
|
354
124
|
throw new Error(`Element ${elementId} not found in DOM`);
|
|
355
125
|
await this.page.mouse.click(coords.x, coords.y);
|
|
356
126
|
await this.page.keyboard.press("Control+a");
|
|
357
|
-
await this.page.keyboard.type(value);
|
|
127
|
+
await this.page.keyboard.type(value, { delay: 80 });
|
|
128
|
+
}
|
|
129
|
+
// ─── Direct selector interaction (for autocomplete clicks) ──────────────────
|
|
130
|
+
async clickSelector(selector) {
|
|
131
|
+
const el = await this.page.waitForSelector(selector, { timeout: 3_000, visible: true });
|
|
132
|
+
if (el) {
|
|
133
|
+
await el.click();
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
throw new Error(`Selector ${selector} not found`);
|
|
137
|
+
}
|
|
358
138
|
}
|
|
359
139
|
// ─── Captured requests ────────────────────────────────────────────────────────
|
|
360
|
-
// All captured requests sorted by relevance score
|
|
361
140
|
getCapturedRequests() {
|
|
362
141
|
return this.interceptor.getAll();
|
|
363
142
|
}
|
|
364
|
-
// Only useful XHR/fetch requests above score threshold
|
|
365
143
|
getUsefulRequests(minScore = 1) {
|
|
366
144
|
return this.interceptor.getUseful(minScore);
|
|
367
145
|
}
|
|
368
|
-
// Only requests that fired as a direct result of a user action
|
|
369
146
|
getActionTriggeredRequests() {
|
|
370
147
|
return this.interceptor.getActionTriggered();
|
|
371
148
|
}
|
|
372
|
-
// Listen to requests in real-time as they come in
|
|
373
149
|
onRequest(handler) {
|
|
374
150
|
this.interceptor.on("request", handler);
|
|
375
151
|
}
|
|
152
|
+
hasSeedValueInResponses(seedValues) {
|
|
153
|
+
return this.interceptor.hasSeedValueInResponses(seedValues);
|
|
154
|
+
}
|
|
155
|
+
markActionTimestamp() {
|
|
156
|
+
this.interceptor.markActionTimestamp();
|
|
157
|
+
}
|
|
376
158
|
// ─── act() — achieve a goal through multi-step navigation ────────────────────
|
|
377
159
|
async act(goal, maxSteps = 10, seedValues) {
|
|
378
160
|
const steps = [];
|
|
379
|
-
const
|
|
380
|
-
let consecutiveFailures = 0;
|
|
381
|
-
const MAX_CONSECUTIVE_FAILURES = 3;
|
|
161
|
+
const history = [];
|
|
382
162
|
for (let i = 0; i < maxSteps; i++) {
|
|
383
|
-
// ALWAYS observe first — no screenshot by default
|
|
384
163
|
const observation = await this.observe();
|
|
385
|
-
let
|
|
164
|
+
let action;
|
|
386
165
|
let usedScreenshot = false;
|
|
387
|
-
// If observe() gives us enough elements, use text-based decision
|
|
388
166
|
if (observation.elements.filter((e) => e.visible).length > 0) {
|
|
389
|
-
|
|
167
|
+
action = await this.ai.decideFromObservation(goal, observation, history, seedValues);
|
|
390
168
|
}
|
|
391
169
|
else {
|
|
392
|
-
// Fallback: page has no readable elements (canvas, iframe, etc.) → screenshot
|
|
393
170
|
const shot = await this.screenshot();
|
|
394
|
-
|
|
171
|
+
action = await this.ai.decideFromScreenshot(goal, shot.base64, history, seedValues);
|
|
395
172
|
usedScreenshot = true;
|
|
396
173
|
}
|
|
397
|
-
const { action, state } = completion;
|
|
398
174
|
steps.push({ action, observeBefore: observation, usedScreenshot });
|
|
399
|
-
|
|
400
|
-
trajectory.push({
|
|
401
|
-
role: "assistant",
|
|
402
|
-
content: JSON.stringify(completion),
|
|
403
|
-
});
|
|
404
|
-
// Track consecutive failures
|
|
405
|
-
if (state.previous_goal_status === "failure") {
|
|
406
|
-
consecutiveFailures++;
|
|
407
|
-
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
408
|
-
return {
|
|
409
|
-
success: false,
|
|
410
|
-
steps,
|
|
411
|
-
finalUrl: this.url,
|
|
412
|
-
reason: `${MAX_CONSECUTIVE_FAILURES} consecutive failures: ${state.previous_goal_eval}`,
|
|
413
|
-
};
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
else if (state.previous_goal_status === "success") {
|
|
417
|
-
consecutiveFailures = 0;
|
|
418
|
-
}
|
|
419
|
-
// Terminal states
|
|
175
|
+
history.push(`Step ${i + 1}: ${action.type} — ${action.reason}`);
|
|
420
176
|
if (action.type === "done") {
|
|
421
177
|
return { success: true, steps, finalUrl: this.url, reason: action.reason };
|
|
422
178
|
}
|
|
423
179
|
if (action.type === "impossible") {
|
|
424
180
|
return { success: false, steps, finalUrl: this.url, reason: action.reason };
|
|
425
181
|
}
|
|
426
|
-
|
|
427
|
-
const actionSuccess = await this.executeAction(action);
|
|
428
|
-
// Add execution result to trajectory
|
|
429
|
-
const resultMsg = actionSuccess
|
|
430
|
-
? `Action '${action.type}' succeeded`
|
|
431
|
-
: `Action '${action.type}' failed`;
|
|
432
|
-
trajectory.push({ role: "user", content: resultMsg });
|
|
433
|
-
// Short wait after interactions for page to settle
|
|
182
|
+
await this.executeAction(action);
|
|
434
183
|
await this.page.waitForTimeout(500);
|
|
435
184
|
}
|
|
436
185
|
return {
|
|
@@ -441,10 +190,7 @@ class Session {
|
|
|
441
190
|
};
|
|
442
191
|
}
|
|
443
192
|
// ─── findData() — navigate + capture requests until goal achieved ─────────────
|
|
444
|
-
// seedValues: concrete example inputs to use during discovery (e.g. what to type in a search box)
|
|
445
193
|
async findData(goal, maxSteps = 15, seedValues) {
|
|
446
|
-
// Do NOT clear — requests captured during goto() (t=0) are valuable.
|
|
447
|
-
// goto() already waits for networkidle + 800ms minimum, so no additional delay needed here.
|
|
448
194
|
const actResult = await this.act(goal, maxSteps, seedValues);
|
|
449
195
|
const capturedRequests = this.interceptor.getUseful();
|
|
450
196
|
return {
|
|
@@ -455,84 +201,49 @@ class Session {
|
|
|
455
201
|
// ─── Cleanup ──────────────────────────────────────────────────────────────────
|
|
456
202
|
async close() {
|
|
457
203
|
this.interceptor.stop();
|
|
458
|
-
|
|
204
|
+
if (!this.page.isClosed()) {
|
|
205
|
+
await this.page.close();
|
|
206
|
+
}
|
|
459
207
|
}
|
|
460
208
|
async closeAll() {
|
|
461
209
|
this.interceptor.stop();
|
|
462
|
-
await this.
|
|
463
|
-
}
|
|
464
|
-
// ─── New tab detection ─────────────────────────────────────────────────────────
|
|
465
|
-
// After a click, check if a new tab was opened and switch the session to it.
|
|
466
|
-
// Rebuilds interceptor/observer so they reference the new page.
|
|
467
|
-
async switchToNewTabIfOpened(pagesBeforeCount) {
|
|
468
|
-
await this.page.waitForTimeout(1000);
|
|
469
|
-
const pages = this.page.context().pages();
|
|
470
|
-
if (pages.length <= pagesBeforeCount)
|
|
471
|
-
return;
|
|
472
|
-
// Switch to the newest tab
|
|
473
|
-
const newPage = pages[pages.length - 1];
|
|
474
|
-
if (newPage === this.page)
|
|
475
|
-
return;
|
|
476
|
-
// Tear down interceptor on old page, rebuild on new page
|
|
477
|
-
this.interceptor.stop();
|
|
478
|
-
this.page = newPage;
|
|
479
|
-
this.interceptor = new interceptor_1.Interceptor(this.page);
|
|
480
|
-
this.observer = new observer_1.Observer(this.page);
|
|
481
|
-
this.interceptor.start();
|
|
482
|
-
try {
|
|
483
|
-
await this.page.waitForLoadState("domcontentloaded", { timeout: this.defaultTimeout });
|
|
484
|
-
}
|
|
485
|
-
catch {
|
|
486
|
-
// Partial load — continue with what we have
|
|
487
|
-
}
|
|
210
|
+
await this.browserHandle.close();
|
|
488
211
|
}
|
|
489
212
|
// ─── Execute action ───────────────────────────────────────────────────────────
|
|
490
213
|
async executeAction(action) {
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
// Scrape is handled by the caller — no-op here
|
|
525
|
-
break;
|
|
526
|
-
case "screenshot":
|
|
527
|
-
// Already handled in act() loop
|
|
528
|
-
break;
|
|
529
|
-
default:
|
|
530
|
-
break;
|
|
531
|
-
}
|
|
532
|
-
return true;
|
|
533
|
-
}
|
|
534
|
-
catch {
|
|
535
|
-
return false;
|
|
214
|
+
switch (action.type) {
|
|
215
|
+
case "click":
|
|
216
|
+
this.interceptor.markActionTimestamp();
|
|
217
|
+
await this.clickById(action.elementId).catch(() => {
|
|
218
|
+
// If all resolution fails, log and continue
|
|
219
|
+
console.warn(`[Session] Click failed for ${action.elementId}`);
|
|
220
|
+
});
|
|
221
|
+
break;
|
|
222
|
+
case "fill":
|
|
223
|
+
this.interceptor.markActionTimestamp();
|
|
224
|
+
await this.fillById(action.elementId, action.value).catch(() => {
|
|
225
|
+
console.warn(`[Session] Fill failed for ${action.elementId}`);
|
|
226
|
+
});
|
|
227
|
+
break;
|
|
228
|
+
case "scroll":
|
|
229
|
+
this.interceptor.markActionTimestamp();
|
|
230
|
+
await this.page.evaluate(({ direction, amount }) => {
|
|
231
|
+
window.scrollBy(0, direction === "down" ? amount : -amount);
|
|
232
|
+
}, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
|
|
233
|
+
// Execution context destroyed = page navigated mid-scroll
|
|
234
|
+
});
|
|
235
|
+
break;
|
|
236
|
+
case "navigate":
|
|
237
|
+
this.interceptor.markActionTimestamp();
|
|
238
|
+
await this.page.goto(action.url, { waitUntil: "domcontentloaded", timeout: this.defaultTimeout });
|
|
239
|
+
break;
|
|
240
|
+
case "wait":
|
|
241
|
+
await this.page.waitForTimeout(action.ms);
|
|
242
|
+
break;
|
|
243
|
+
case "screenshot":
|
|
244
|
+
break;
|
|
245
|
+
default:
|
|
246
|
+
break;
|
|
536
247
|
}
|
|
537
248
|
}
|
|
538
249
|
}
|