zan-browser 1.3.38 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai.d.ts +3 -16
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +101 -208
- package/dist/ai.js.map +1 -1
- package/dist/blacklist.d.ts +2 -0
- package/dist/blacklist.d.ts.map +1 -0
- package/dist/blacklist.js +115 -0
- package/dist/blacklist.js.map +1 -0
- package/dist/browser-provider.d.ts +81 -0
- package/dist/browser-provider.d.ts.map +1 -0
- package/dist/browser-provider.js +6 -0
- package/dist/browser-provider.js.map +1 -0
- package/dist/browser.d.ts +1 -1
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +57 -59
- package/dist/browser.js.map +1 -1
- package/dist/index.d.ts +7 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/interceptor.d.ts +6 -3
- package/dist/interceptor.d.ts.map +1 -1
- package/dist/interceptor.js +61 -21
- package/dist/interceptor.js.map +1 -1
- package/dist/navigator.d.ts +40 -0
- package/dist/navigator.d.ts.map +1 -0
- package/dist/navigator.js +506 -0
- package/dist/navigator.js.map +1 -0
- package/dist/observer.d.ts +23 -3
- package/dist/observer.d.ts.map +1 -1
- package/dist/observer.js +310 -270
- package/dist/observer.js.map +1 -1
- package/dist/perception.d.ts +42 -0
- package/dist/perception.d.ts.map +1 -0
- package/dist/perception.js +140 -0
- package/dist/perception.js.map +1 -0
- package/dist/providers/browserbase.d.ts +12 -0
- package/dist/providers/browserbase.d.ts.map +1 -0
- package/dist/providers/browserbase.js +226 -0
- package/dist/providers/browserbase.js.map +1 -0
- package/dist/providers/puppeteer-local.d.ts +5 -0
- package/dist/providers/puppeteer-local.d.ts.map +1 -0
- package/dist/providers/puppeteer-local.js +218 -0
- package/dist/providers/puppeteer-local.js.map +1 -0
- package/dist/session.d.ts +17 -23
- package/dist/session.d.ts.map +1 -1
- package/dist/session.js +123 -407
- package/dist/session.js.map +1 -1
- package/dist/types.d.ts +1 -32
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -2
package/dist/session.js
CHANGED
|
@@ -5,63 +5,44 @@ const interceptor_1 = require("./interceptor");
|
|
|
5
5
|
const observer_1 = require("./observer");
|
|
6
6
|
class Session {
|
|
7
7
|
page;
|
|
8
|
-
|
|
8
|
+
browserHandle;
|
|
9
9
|
interceptor;
|
|
10
10
|
observer;
|
|
11
11
|
ai;
|
|
12
12
|
defaultTimeout;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
constructor(page, browser, ai, timeout = 30_000, viewerUrl) {
|
|
13
|
+
viewerUrl;
|
|
14
|
+
constructor(page, browserHandle, ai, timeout = 30_000, viewerUrl) {
|
|
16
15
|
this.page = page;
|
|
17
|
-
this.
|
|
16
|
+
this.browserHandle = browserHandle;
|
|
18
17
|
this.ai = ai;
|
|
19
18
|
this.defaultTimeout = timeout;
|
|
20
19
|
this.viewerUrl = viewerUrl;
|
|
21
20
|
this.interceptor = new interceptor_1.Interceptor(page);
|
|
22
21
|
this.observer = new observer_1.Observer(page);
|
|
23
22
|
// Start capturing traffic immediately — t=0
|
|
24
|
-
// This catches requests the site fires on load before we do anything
|
|
25
23
|
this.interceptor.start();
|
|
26
24
|
}
|
|
25
|
+
// ─── Accessors for SmartNavigator ───────────────────────────────────────────
|
|
26
|
+
getPage() { return this.page; }
|
|
27
|
+
getInterceptor() { return this.interceptor; }
|
|
28
|
+
getObserver() { return this.observer; }
|
|
29
|
+
getAI() { return this.ai; }
|
|
27
30
|
// ─── Navigation ───────────────────────────────────────────────────────────────
|
|
28
31
|
async goto(url, options) {
|
|
29
32
|
const timeout = options?.timeout ?? this.defaultTimeout;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
timeout,
|
|
35
|
-
});
|
|
36
|
-
console.log(`[goto] domcontentloaded ok — url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
|
|
37
|
-
}
|
|
38
|
-
catch (err) {
|
|
39
|
-
console.log(`[goto] page.goto failed: ${err.message}`);
|
|
40
|
-
// On timeout, don't destroy the session — wait for network idle on the
|
|
41
|
-
// partially loaded page, then continue if the URL actually changed.
|
|
42
|
-
try {
|
|
43
|
-
await this.page.waitForLoadState("networkidle", { timeout: 5_000 });
|
|
44
|
-
console.log(`[goto] networkidle fallback ok — url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
|
|
45
|
-
}
|
|
46
|
-
catch (err2) {
|
|
47
|
-
console.log(`[goto] networkidle fallback also timed out: ${err2.message}`);
|
|
48
|
-
}
|
|
49
|
-
const currentUrl = this.page.url();
|
|
50
|
-
if (currentUrl === "about:blank" || currentUrl === "") {
|
|
51
|
-
console.log(`[goto] FATAL: still on about:blank — throwing`);
|
|
52
|
-
throw new Error(`Navigation to ${url} failed: page is still on about:blank — ${err.message}`);
|
|
53
|
-
}
|
|
54
|
-
console.log(`[goto] partial load accepted — url: ${currentUrl}, tabs: ${this.page.context().pages().length}`);
|
|
55
|
-
}
|
|
33
|
+
await this.page.goto(url, {
|
|
34
|
+
waitUntil: "domcontentloaded",
|
|
35
|
+
timeout,
|
|
36
|
+
});
|
|
56
37
|
// Wait for network to go idle — catches XHR that SPAs fire after DOM is parsed.
|
|
57
38
|
try {
|
|
58
|
-
await this.page.
|
|
39
|
+
await this.page.waitForNetworkIdle({ timeout: 8_000 });
|
|
59
40
|
}
|
|
60
41
|
catch {
|
|
61
42
|
// networkidle timed out — page likely has background polling, continue anyway
|
|
62
43
|
}
|
|
44
|
+
// Minimum 800ms after networkidle (or timeout) — some SPAs fire XHR slightly after idle
|
|
63
45
|
await this.page.waitForTimeout(800);
|
|
64
|
-
console.log(`[goto] done — final url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
|
|
65
46
|
}
|
|
66
47
|
async wait(ms) {
|
|
67
48
|
await this.page.waitForTimeout(ms);
|
|
@@ -71,21 +52,28 @@ class Session {
|
|
|
71
52
|
}
|
|
72
53
|
// ─── Observe — always first, before any action ────────────────────────────────
|
|
73
54
|
async observe() {
|
|
74
|
-
this.lastInteractiveElements.clear();
|
|
75
55
|
return this.observer.observe();
|
|
76
56
|
}
|
|
77
|
-
// ───
|
|
57
|
+
// ─── Form and autocomplete detection ────────────────────────────────────────
|
|
58
|
+
async detectForms() {
|
|
59
|
+
return this.observer.detectForms();
|
|
60
|
+
}
|
|
61
|
+
async detectAutocomplete() {
|
|
62
|
+
return this.observer.detectAutocomplete();
|
|
63
|
+
}
|
|
64
|
+
async detectErrors() {
|
|
65
|
+
return this.observer.detectErrors();
|
|
66
|
+
}
|
|
67
|
+
// ─── Screenshot ────────────────────────────────────────────────────────────────
|
|
78
68
|
async screenshot() {
|
|
79
|
-
const
|
|
69
|
+
const result = await this.page.screenshot({ type: "jpeg", quality: 80, encoding: "base64" });
|
|
80
70
|
return {
|
|
81
|
-
base64:
|
|
71
|
+
base64: typeof result === "string" ? result : result.toString("base64"),
|
|
82
72
|
mimeType: "image/jpeg",
|
|
83
73
|
timestamp: Date.now(),
|
|
84
74
|
};
|
|
85
75
|
}
|
|
86
76
|
// ─── DOM text extraction — fallback when no useful XHR is captured ────────────
|
|
87
|
-
// Strips chrome (nav, footer, scripts) and returns visible text capped at 15k chars.
|
|
88
|
-
// Used by web-agent when the page renders data server-side with no API calls.
|
|
89
77
|
async extractPageContent() {
|
|
90
78
|
return await this.page.evaluate(() => {
|
|
91
79
|
const clone = document.body.cloneNode(true);
|
|
@@ -99,344 +87,99 @@ class Session {
|
|
|
99
87
|
.slice(0, 15000);
|
|
100
88
|
});
|
|
101
89
|
}
|
|
102
|
-
// ───
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
const clone = document.body.cloneNode(true);
|
|
108
|
-
clone.querySelectorAll("script,style,nav,footer,header")
|
|
109
|
-
.forEach((el) => el.remove());
|
|
110
|
-
const content = clone.innerText
|
|
111
|
-
.split("\n")
|
|
112
|
-
.map((l) => l.trim())
|
|
113
|
-
.filter((l) => l.length > 0)
|
|
114
|
-
.join("\n")
|
|
115
|
-
.slice(0, 12000);
|
|
116
|
-
const links = [];
|
|
117
|
-
const seen = new Set();
|
|
118
|
-
document.querySelectorAll("a[href]").forEach((a) => {
|
|
119
|
-
const href = a.href;
|
|
120
|
-
const text = a.innerText.trim().slice(0, 100);
|
|
121
|
-
if (href && text && !seen.has(href) && href.startsWith("http")) {
|
|
122
|
-
seen.add(href);
|
|
123
|
-
links.push({ href, text });
|
|
124
|
-
}
|
|
125
|
-
});
|
|
126
|
-
return { content, links: links.slice(0, 80) };
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
// ─── Interactive DOM observation — full interactive element scan ─────────────
|
|
130
|
-
// Replaces extractPageWithLinks() for agent navigation: returns ALL interactive
|
|
131
|
-
// elements (buttons, menus, dropdowns, inputs) not just anchor links.
|
|
132
|
-
// Sets data-rid attributes on matched elements so clickById() can target them.
|
|
133
|
-
async observeInteractiveDom() {
|
|
134
|
-
const INTERACTIVE_SEL = [
|
|
135
|
-
"button", "[role='button']", "[role='switch']",
|
|
136
|
-
"input:not([type='hidden'])",
|
|
137
|
-
"a[href]", "select", "textarea",
|
|
138
|
-
"[role='menuitem']", "[role='option']", "[role='tab']", "[role='combobox']",
|
|
139
|
-
"[onclick]", "[tabindex]:not([tabindex='-1'])",
|
|
140
|
-
].join(", ");
|
|
141
|
-
const result = await this.page.evaluate((interactiveSel) => {
|
|
142
|
-
// Clean previous markers
|
|
143
|
-
document.querySelectorAll("[data-rid]").forEach((el) => el.removeAttribute("data-rid"));
|
|
144
|
-
const isVisible = (el) => {
|
|
145
|
-
const htmlEl = el;
|
|
146
|
-
// offsetParent is null when the element or any ancestor has display:none.
|
|
147
|
-
// Exception: <body>, <html>, and position:fixed/sticky elements legitimately have offsetParent === null.
|
|
148
|
-
if (htmlEl.offsetParent === null) {
|
|
149
|
-
const style = window.getComputedStyle(el);
|
|
150
|
-
const pos = style.position;
|
|
151
|
-
if (pos !== "fixed" && pos !== "sticky" && el.tagName !== "BODY" && el.tagName !== "HTML") {
|
|
152
|
-
return false;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
const style = window.getComputedStyle(el);
|
|
156
|
-
if (style.visibility === "hidden" || style.opacity === "0")
|
|
157
|
-
return false;
|
|
158
|
-
const rect = el.getBoundingClientRect();
|
|
159
|
-
if (rect.width === 0 || rect.height === 0)
|
|
160
|
-
return false;
|
|
161
|
-
// Check if element is within viewport bounds
|
|
162
|
-
const vh = window.innerHeight;
|
|
163
|
-
const vw = window.innerWidth;
|
|
164
|
-
return rect.bottom > 0 && rect.top < vh && rect.right > 0 && rect.left < vw;
|
|
165
|
-
};
|
|
166
|
-
const truncate = (v, max = 60) => v.length > max ? v.slice(0, max) : v;
|
|
167
|
-
const resolveUrl = (href) => {
|
|
168
|
-
if (!href)
|
|
169
|
-
return href;
|
|
170
|
-
try {
|
|
171
|
-
return new URL(href, window.location.href).href;
|
|
172
|
-
}
|
|
173
|
-
catch {
|
|
174
|
-
return href;
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
const ATTR_KEYS = [
|
|
178
|
-
"title", "type", "name", "role", "aria-label",
|
|
179
|
-
"placeholder", "value", "alt", "href", "aria-expanded",
|
|
180
|
-
];
|
|
181
|
-
const ATTR_RENDER_ORDER = [
|
|
182
|
-
"title", "type", "name", "role", "aria_label",
|
|
183
|
-
"placeholder", "value", "alt", "href", "aria_expanded",
|
|
184
|
-
];
|
|
185
|
-
const getAttrs = (el) => {
|
|
186
|
-
const out = {};
|
|
187
|
-
for (const attr of ATTR_KEYS) {
|
|
188
|
-
let val = el.getAttribute(attr);
|
|
189
|
-
if (val == null || val === "")
|
|
190
|
-
continue;
|
|
191
|
-
if (attr === "href")
|
|
192
|
-
val = resolveUrl(val);
|
|
193
|
-
const key = attr.replace(/-/g, "_");
|
|
194
|
-
out[key] = truncate(val);
|
|
195
|
-
}
|
|
196
|
-
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement)
|
|
197
|
-
&& el.value) {
|
|
198
|
-
out["value"] = truncate(el.value);
|
|
199
|
-
}
|
|
200
|
-
return out;
|
|
201
|
-
};
|
|
202
|
-
const getInnerText = (el) => (el.textContent ?? "").replace(/\s+/g, " ").trim().slice(0, 100);
|
|
203
|
-
// B — buttons, checkboxes, radios, switches, role="button"
|
|
204
|
-
// L — anchor links
|
|
205
|
-
// I — inputs, textareas, selects
|
|
206
|
-
// M — menuitem, option, tab, combobox, other interactables
|
|
207
|
-
const categorize = (el) => {
|
|
208
|
-
const tag = el.tagName.toLowerCase();
|
|
209
|
-
const role = el.getAttribute("role") ?? "";
|
|
210
|
-
const type = el.type ?? "";
|
|
211
|
-
if (tag === "a" && el.hasAttribute("href"))
|
|
212
|
-
return "L";
|
|
213
|
-
if (tag === "textarea" || tag === "select")
|
|
214
|
-
return "I";
|
|
215
|
-
if (tag === "input" && !["submit", "button", "checkbox", "radio", "hidden"].includes(type))
|
|
216
|
-
return "I";
|
|
217
|
-
if (tag === "button")
|
|
218
|
-
return "B";
|
|
219
|
-
if (tag === "input" && ["submit", "button", "checkbox", "radio"].includes(type))
|
|
220
|
-
return "B";
|
|
221
|
-
if (role === "button" || role === "switch")
|
|
222
|
-
return "B";
|
|
223
|
-
if (["menuitem", "option", "tab", "combobox"].includes(role))
|
|
224
|
-
return "M";
|
|
225
|
-
return "M";
|
|
226
|
-
};
|
|
227
|
-
const interactiveSet = new Set();
|
|
228
|
-
document.querySelectorAll(interactiveSel).forEach((el) => interactiveSet.add(el));
|
|
229
|
-
// Only include text near interactive elements — headings, labels, or
|
|
230
|
-
// nodes whose parent/grandparent contains an interactive child.
|
|
231
|
-
const isNearInteractive = (parent) => {
|
|
232
|
-
if (!parent)
|
|
233
|
-
return false;
|
|
234
|
-
const tag = parent.tagName.toLowerCase();
|
|
235
|
-
if (["h1", "h2", "h3", "h4", "h5", "h6", "label", "legend", "figcaption", "caption", "th", "li"].includes(tag))
|
|
236
|
-
return true;
|
|
237
|
-
try {
|
|
238
|
-
if (parent.querySelector(interactiveSel))
|
|
239
|
-
return true;
|
|
240
|
-
if (parent.parentElement?.querySelector(interactiveSel))
|
|
241
|
-
return true;
|
|
242
|
-
}
|
|
243
|
-
catch { /* selector might fail on exotic DOMs */ }
|
|
244
|
-
return false;
|
|
245
|
-
};
|
|
246
|
-
const SELF_CLOSING = new Set(["input", "img", "br", "hr"]);
|
|
247
|
-
const lines = [];
|
|
248
|
-
const elements = [];
|
|
249
|
-
const counters = { B: 0, L: 0, I: 0, M: 0 };
|
|
250
|
-
const walk = (node) => {
|
|
251
|
-
if (node.nodeType === Node.TEXT_NODE) {
|
|
252
|
-
const text = (node.textContent ?? "").replace(/\s+/g, " ").trim();
|
|
253
|
-
if (text.length < 2)
|
|
254
|
-
return;
|
|
255
|
-
const parent = node.parentElement;
|
|
256
|
-
if (!parent)
|
|
257
|
-
return;
|
|
258
|
-
if (parent.tagName === "SCRIPT" || parent.tagName === "STYLE" || parent.tagName === "NOSCRIPT")
|
|
259
|
-
return;
|
|
260
|
-
if (interactiveSet.has(parent))
|
|
261
|
-
return; // text goes inside the tag
|
|
262
|
-
if (!isVisible(parent))
|
|
263
|
-
return;
|
|
264
|
-
if (!isNearInteractive(parent))
|
|
265
|
-
return;
|
|
266
|
-
lines.push(`_[:] ${text.slice(0, 150)}`);
|
|
267
|
-
return;
|
|
268
|
-
}
|
|
269
|
-
if (node.nodeType !== Node.ELEMENT_NODE)
|
|
270
|
-
return;
|
|
271
|
-
const el = node;
|
|
272
|
-
const tag = el.tagName.toLowerCase();
|
|
273
|
-
if (tag === "script" || tag === "style" || tag === "noscript")
|
|
274
|
-
return;
|
|
275
|
-
if (interactiveSet.has(el)) {
|
|
276
|
-
if (!isVisible(el))
|
|
277
|
-
return;
|
|
278
|
-
const cat = categorize(el);
|
|
279
|
-
counters[cat]++;
|
|
280
|
-
const id = `${cat}${counters[cat]}`;
|
|
281
|
-
const attrs = getAttrs(el);
|
|
282
|
-
const text = getInnerText(el);
|
|
283
|
-
const htmlTag = tag === "a" ? "a" : tag;
|
|
284
|
-
// Mark element for Playwright selector lookup
|
|
285
|
-
el.setAttribute("data-rid", id);
|
|
286
|
-
const attrParts = [];
|
|
287
|
-
for (const key of ATTR_RENDER_ORDER) {
|
|
288
|
-
if (attrs[key])
|
|
289
|
-
attrParts.push(`${key}="${attrs[key]}"`);
|
|
290
|
-
}
|
|
291
|
-
const attrStr = attrParts.length > 0 ? " " + attrParts.join(" ") : "";
|
|
292
|
-
if (SELF_CLOSING.has(htmlTag)) {
|
|
293
|
-
lines.push(`${id}[:] <${htmlTag}${attrStr}/>`);
|
|
294
|
-
}
|
|
295
|
-
else {
|
|
296
|
-
lines.push(`${id}[:] <${htmlTag}${attrStr}>${text}</${htmlTag}>`);
|
|
297
|
-
}
|
|
298
|
-
elements.push({ id, selector: `[data-rid="${id}"]` });
|
|
299
|
-
return; // don't recurse into interactive elements
|
|
300
|
-
}
|
|
301
|
-
for (const child of Array.from(el.childNodes)) {
|
|
302
|
-
walk(child);
|
|
303
|
-
}
|
|
304
|
-
};
|
|
305
|
-
walk(document.body);
|
|
306
|
-
return { lines, elements };
|
|
307
|
-
}, INTERACTIVE_SEL);
|
|
308
|
-
this.lastInteractiveElements = new Map(result.elements.map((e) => [e.id, e.selector]));
|
|
309
|
-
return {
|
|
310
|
-
dom: result.lines.join("\n"),
|
|
311
|
-
elements: result.elements,
|
|
312
|
-
};
|
|
313
|
-
}
|
|
314
|
-
// ─── Intelligent DOM scrape — Notte-style content understanding ──────────────
|
|
315
|
-
// Passes visible page text to Claude Sonnet and asks whether the goal data is present.
|
|
316
|
-
// If not found, follows nextUrl suggestions up to maxHops times before giving up.
|
|
317
|
-
// Caller must supply initialContent (from extractPageContent()) to avoid a double call.
|
|
318
|
-
async scrapeWithGoal(goal, initialContent, seedValues, maxHops = 3, initialLinks) {
|
|
319
|
-
let content = initialContent;
|
|
320
|
-
let links = initialLinks;
|
|
321
|
-
for (let hop = 0; hop <= maxHops; hop++) {
|
|
322
|
-
const result = await this.ai.scrapePageContent(goal, content, seedValues, links);
|
|
323
|
-
if (result.found || !result.nextUrl || hop === maxHops) {
|
|
324
|
-
return result;
|
|
325
|
-
}
|
|
326
|
-
// Follow the suggested link and re-scrape
|
|
90
|
+
// ─── Interaction ──────────────────────────────────────────────────────────────
|
|
91
|
+
async clickById(elementId) {
|
|
92
|
+
// First try cached CSS selector (reliable)
|
|
93
|
+
const selector = this.observer.getSelector(elementId);
|
|
94
|
+
if (selector) {
|
|
327
95
|
try {
|
|
328
|
-
await this.
|
|
329
|
-
|
|
330
|
-
content = page.content;
|
|
331
|
-
links = page.links;
|
|
96
|
+
await this.page.click(selector);
|
|
97
|
+
return;
|
|
332
98
|
}
|
|
333
99
|
catch {
|
|
334
|
-
|
|
100
|
+
// Fall through to coordinate-based click
|
|
335
101
|
}
|
|
336
102
|
}
|
|
337
|
-
|
|
103
|
+
const coords = await this.observer.resolveElementId(elementId);
|
|
104
|
+
if (!coords)
|
|
105
|
+
throw new Error(`Element ${elementId} not found in DOM`);
|
|
106
|
+
await this.page.mouse.click(coords.x, coords.y);
|
|
338
107
|
}
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
async clickById(elementId) {
|
|
343
|
-
const pagesBefore = this.page.context().pages().length;
|
|
344
|
-
const selector = this.lastInteractiveElements.get(elementId);
|
|
108
|
+
async fillById(elementId, value) {
|
|
109
|
+
// First try cached CSS selector (reliable)
|
|
110
|
+
const selector = this.observer.getSelector(elementId);
|
|
345
111
|
if (selector) {
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
112
|
+
try {
|
|
113
|
+
await this.page.click(selector, { clickCount: 3 }); // select all
|
|
114
|
+
await this.page.keyboard.press("Backspace");
|
|
115
|
+
await this.page.type(selector, value, { delay: 80 });
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
// Fall through to coordinate-based fill
|
|
120
|
+
}
|
|
353
121
|
}
|
|
354
|
-
await this.switchToNewTabIfOpened(pagesBefore);
|
|
355
|
-
}
|
|
356
|
-
// Fill input by element ID from observe()
|
|
357
|
-
async fillById(elementId, value) {
|
|
358
122
|
const coords = await this.observer.resolveElementId(elementId);
|
|
359
123
|
if (!coords)
|
|
360
124
|
throw new Error(`Element ${elementId} not found in DOM`);
|
|
361
125
|
await this.page.mouse.click(coords.x, coords.y);
|
|
362
126
|
await this.page.keyboard.press("Control+a");
|
|
363
|
-
await this.page.keyboard.type(value);
|
|
127
|
+
await this.page.keyboard.type(value, { delay: 80 });
|
|
128
|
+
}
|
|
129
|
+
// ─── Direct selector interaction (for autocomplete clicks) ──────────────────
|
|
130
|
+
async clickSelector(selector) {
|
|
131
|
+
const el = await this.page.waitForSelector(selector, { timeout: 3_000, visible: true });
|
|
132
|
+
if (el) {
|
|
133
|
+
await el.click();
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
throw new Error(`Selector ${selector} not found`);
|
|
137
|
+
}
|
|
364
138
|
}
|
|
365
139
|
// ─── Captured requests ────────────────────────────────────────────────────────
|
|
366
|
-
// All captured requests sorted by relevance score
|
|
367
140
|
getCapturedRequests() {
|
|
368
141
|
return this.interceptor.getAll();
|
|
369
142
|
}
|
|
370
|
-
// Only useful XHR/fetch requests above score threshold
|
|
371
143
|
getUsefulRequests(minScore = 1) {
|
|
372
144
|
return this.interceptor.getUseful(minScore);
|
|
373
145
|
}
|
|
374
|
-
// Only requests that fired as a direct result of a user action
|
|
375
146
|
getActionTriggeredRequests() {
|
|
376
147
|
return this.interceptor.getActionTriggered();
|
|
377
148
|
}
|
|
378
|
-
// Listen to requests in real-time as they come in
|
|
379
149
|
onRequest(handler) {
|
|
380
150
|
this.interceptor.on("request", handler);
|
|
381
151
|
}
|
|
152
|
+
hasSeedValueInResponses(seedValues) {
|
|
153
|
+
return this.interceptor.hasSeedValueInResponses(seedValues);
|
|
154
|
+
}
|
|
155
|
+
markActionTimestamp() {
|
|
156
|
+
this.interceptor.markActionTimestamp();
|
|
157
|
+
}
|
|
382
158
|
// ─── act() — achieve a goal through multi-step navigation ────────────────────
|
|
383
159
|
async act(goal, maxSteps = 10, seedValues) {
|
|
384
160
|
const steps = [];
|
|
385
|
-
const
|
|
386
|
-
let consecutiveFailures = 0;
|
|
387
|
-
const MAX_CONSECUTIVE_FAILURES = 3;
|
|
161
|
+
const history = [];
|
|
388
162
|
for (let i = 0; i < maxSteps; i++) {
|
|
389
|
-
// ALWAYS observe first — no screenshot by default
|
|
390
163
|
const observation = await this.observe();
|
|
391
|
-
let
|
|
164
|
+
let action;
|
|
392
165
|
let usedScreenshot = false;
|
|
393
|
-
// If observe() gives us enough elements, use text-based decision
|
|
394
166
|
if (observation.elements.filter((e) => e.visible).length > 0) {
|
|
395
|
-
|
|
167
|
+
action = await this.ai.decideFromObservation(goal, observation, history, seedValues);
|
|
396
168
|
}
|
|
397
169
|
else {
|
|
398
|
-
// Fallback: page has no readable elements (canvas, iframe, etc.) → screenshot
|
|
399
170
|
const shot = await this.screenshot();
|
|
400
|
-
|
|
171
|
+
action = await this.ai.decideFromScreenshot(goal, shot.base64, history, seedValues);
|
|
401
172
|
usedScreenshot = true;
|
|
402
173
|
}
|
|
403
|
-
const { action, state } = completion;
|
|
404
174
|
steps.push({ action, observeBefore: observation, usedScreenshot });
|
|
405
|
-
|
|
406
|
-
trajectory.push({
|
|
407
|
-
role: "assistant",
|
|
408
|
-
content: JSON.stringify(completion),
|
|
409
|
-
});
|
|
410
|
-
// Track consecutive failures
|
|
411
|
-
if (state.previous_goal_status === "failure") {
|
|
412
|
-
consecutiveFailures++;
|
|
413
|
-
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
414
|
-
return {
|
|
415
|
-
success: false,
|
|
416
|
-
steps,
|
|
417
|
-
finalUrl: this.url,
|
|
418
|
-
reason: `${MAX_CONSECUTIVE_FAILURES} consecutive failures: ${state.previous_goal_eval}`,
|
|
419
|
-
};
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
else if (state.previous_goal_status === "success") {
|
|
423
|
-
consecutiveFailures = 0;
|
|
424
|
-
}
|
|
425
|
-
// Terminal states
|
|
175
|
+
history.push(`Step ${i + 1}: ${action.type} — ${action.reason}`);
|
|
426
176
|
if (action.type === "done") {
|
|
427
177
|
return { success: true, steps, finalUrl: this.url, reason: action.reason };
|
|
428
178
|
}
|
|
429
179
|
if (action.type === "impossible") {
|
|
430
180
|
return { success: false, steps, finalUrl: this.url, reason: action.reason };
|
|
431
181
|
}
|
|
432
|
-
|
|
433
|
-
const actionSuccess = await this.executeAction(action);
|
|
434
|
-
// Add execution result to trajectory
|
|
435
|
-
const resultMsg = actionSuccess
|
|
436
|
-
? `Action '${action.type}' succeeded`
|
|
437
|
-
: `Action '${action.type}' failed`;
|
|
438
|
-
trajectory.push({ role: "user", content: resultMsg });
|
|
439
|
-
// Short wait after interactions for page to settle
|
|
182
|
+
await this.executeAction(action);
|
|
440
183
|
await this.page.waitForTimeout(500);
|
|
441
184
|
}
|
|
442
185
|
return {
|
|
@@ -447,10 +190,7 @@ class Session {
|
|
|
447
190
|
};
|
|
448
191
|
}
|
|
449
192
|
// ─── findData() — navigate + capture requests until goal achieved ─────────────
|
|
450
|
-
// seedValues: concrete example inputs to use during discovery (e.g. what to type in a search box)
|
|
451
193
|
async findData(goal, maxSteps = 15, seedValues) {
|
|
452
|
-
// Do NOT clear — requests captured during goto() (t=0) are valuable.
|
|
453
|
-
// goto() already waits for networkidle + 800ms minimum, so no additional delay needed here.
|
|
454
194
|
const actResult = await this.act(goal, maxSteps, seedValues);
|
|
455
195
|
const capturedRequests = this.interceptor.getUseful();
|
|
456
196
|
return {
|
|
@@ -461,84 +201,60 @@ class Session {
|
|
|
461
201
|
// ─── Cleanup ──────────────────────────────────────────────────────────────────
|
|
462
202
|
async close() {
|
|
463
203
|
this.interceptor.stop();
|
|
464
|
-
|
|
204
|
+
if (!this.page.isClosed()) {
|
|
205
|
+
await this.page.close();
|
|
206
|
+
}
|
|
465
207
|
}
|
|
466
208
|
async closeAll() {
|
|
467
209
|
this.interceptor.stop();
|
|
468
|
-
await this.
|
|
469
|
-
}
|
|
470
|
-
// ─── New tab detection ─────────────────────────────────────────────────────────
|
|
471
|
-
// After a click, check if a new tab was opened and switch the session to it.
|
|
472
|
-
// Rebuilds interceptor/observer so they reference the new page.
|
|
473
|
-
async switchToNewTabIfOpened(pagesBeforeCount) {
|
|
474
|
-
await this.page.waitForTimeout(1000);
|
|
475
|
-
const pages = this.page.context().pages();
|
|
476
|
-
if (pages.length <= pagesBeforeCount)
|
|
477
|
-
return;
|
|
478
|
-
// Switch to the newest tab
|
|
479
|
-
const newPage = pages[pages.length - 1];
|
|
480
|
-
if (newPage === this.page)
|
|
481
|
-
return;
|
|
482
|
-
// Tear down interceptor on old page, rebuild on new page
|
|
483
|
-
this.interceptor.stop();
|
|
484
|
-
this.page = newPage;
|
|
485
|
-
this.interceptor = new interceptor_1.Interceptor(this.page);
|
|
486
|
-
this.observer = new observer_1.Observer(this.page);
|
|
487
|
-
this.interceptor.start();
|
|
488
|
-
try {
|
|
489
|
-
await this.page.waitForLoadState("domcontentloaded", { timeout: this.defaultTimeout });
|
|
490
|
-
}
|
|
491
|
-
catch {
|
|
492
|
-
// Partial load — continue with what we have
|
|
493
|
-
}
|
|
210
|
+
await this.browserHandle.close();
|
|
494
211
|
}
|
|
495
212
|
// ─── Execute action ───────────────────────────────────────────────────────────
|
|
496
213
|
async executeAction(action) {
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
return false;
|
|
214
|
+
switch (action.type) {
|
|
215
|
+
case "click":
|
|
216
|
+
this.interceptor.markActionTimestamp();
|
|
217
|
+
await this.clickById(action.elementId).catch(() => {
|
|
218
|
+
// If all resolution fails, log and continue
|
|
219
|
+
console.warn(`[Session] Click failed for ${action.elementId}`);
|
|
220
|
+
});
|
|
221
|
+
break;
|
|
222
|
+
case "fill":
|
|
223
|
+
this.interceptor.markActionTimestamp();
|
|
224
|
+
await this.fillById(action.elementId, action.value).catch(() => {
|
|
225
|
+
console.warn(`[Session] Fill failed for ${action.elementId}`);
|
|
226
|
+
});
|
|
227
|
+
break;
|
|
228
|
+
case "scroll":
|
|
229
|
+
this.interceptor.markActionTimestamp();
|
|
230
|
+
await this.page.evaluate(({ direction, amount }) => {
|
|
231
|
+
window.scrollBy(0, direction === "down" ? amount : -amount);
|
|
232
|
+
}, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
|
|
233
|
+
// Execution context destroyed = page navigated mid-scroll
|
|
234
|
+
});
|
|
235
|
+
break;
|
|
236
|
+
case "navigate":
|
|
237
|
+
this.interceptor.markActionTimestamp();
|
|
238
|
+
try {
|
|
239
|
+
await this.page.goto(action.url, { waitUntil: "domcontentloaded", timeout: this.defaultTimeout });
|
|
240
|
+
// Wait for network idle like Session.goto() does
|
|
241
|
+
try {
|
|
242
|
+
await this.page.waitForNetworkIdle({ timeout: 8_000 });
|
|
243
|
+
}
|
|
244
|
+
catch { /* background polling — continue */ }
|
|
245
|
+
await this.page.waitForTimeout(800);
|
|
246
|
+
}
|
|
247
|
+
catch (err) {
|
|
248
|
+
console.warn(`[Session] Navigate failed for ${action.url}:`, err.message);
|
|
249
|
+
}
|
|
250
|
+
break;
|
|
251
|
+
case "wait":
|
|
252
|
+
await this.page.waitForTimeout(action.ms);
|
|
253
|
+
break;
|
|
254
|
+
case "screenshot":
|
|
255
|
+
break;
|
|
256
|
+
default:
|
|
257
|
+
break;
|
|
542
258
|
}
|
|
543
259
|
}
|
|
544
260
|
}
|