zan-browser 1.3.37 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/ai.d.ts +3 -16
  2. package/dist/ai.d.ts.map +1 -1
  3. package/dist/ai.js +101 -208
  4. package/dist/ai.js.map +1 -1
  5. package/dist/blacklist.d.ts +2 -0
  6. package/dist/blacklist.d.ts.map +1 -0
  7. package/dist/blacklist.js +115 -0
  8. package/dist/blacklist.js.map +1 -0
  9. package/dist/browser-provider.d.ts +81 -0
  10. package/dist/browser-provider.d.ts.map +1 -0
  11. package/dist/browser-provider.js +6 -0
  12. package/dist/browser-provider.js.map +1 -0
  13. package/dist/browser.d.ts +1 -1
  14. package/dist/browser.d.ts.map +1 -1
  15. package/dist/browser.js +51 -60
  16. package/dist/browser.js.map +1 -1
  17. package/dist/index.d.ts +7 -1
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +5 -1
  20. package/dist/index.js.map +1 -1
  21. package/dist/interceptor.d.ts +3 -3
  22. package/dist/interceptor.d.ts.map +1 -1
  23. package/dist/interceptor.js +45 -19
  24. package/dist/interceptor.js.map +1 -1
  25. package/dist/navigator.d.ts +40 -0
  26. package/dist/navigator.d.ts.map +1 -0
  27. package/dist/navigator.js +507 -0
  28. package/dist/navigator.js.map +1 -0
  29. package/dist/observer.d.ts +23 -3
  30. package/dist/observer.d.ts.map +1 -1
  31. package/dist/observer.js +310 -270
  32. package/dist/observer.js.map +1 -1
  33. package/dist/perception.d.ts +42 -0
  34. package/dist/perception.d.ts.map +1 -0
  35. package/dist/perception.js +140 -0
  36. package/dist/perception.js.map +1 -0
  37. package/dist/providers/browserbase.d.ts +12 -0
  38. package/dist/providers/browserbase.d.ts.map +1 -0
  39. package/dist/providers/browserbase.js +226 -0
  40. package/dist/providers/browserbase.js.map +1 -0
  41. package/dist/providers/puppeteer-local.d.ts +5 -0
  42. package/dist/providers/puppeteer-local.d.ts.map +1 -0
  43. package/dist/providers/puppeteer-local.js +218 -0
  44. package/dist/providers/puppeteer-local.js.map +1 -0
  45. package/dist/session.d.ts +17 -23
  46. package/dist/session.d.ts.map +1 -1
  47. package/dist/session.js +112 -401
  48. package/dist/session.js.map +1 -1
  49. package/dist/types.d.ts +1 -32
  50. package/dist/types.d.ts.map +1 -1
  51. package/package.json +3 -2
package/dist/session.js CHANGED
@@ -5,56 +5,43 @@ const interceptor_1 = require("./interceptor");
5
5
  const observer_1 = require("./observer");
6
6
  class Session {
7
7
  page;
8
- browser;
8
+ browserHandle;
9
9
  interceptor;
10
10
  observer;
11
11
  ai;
12
12
  defaultTimeout;
13
- lastInteractiveElements = new Map();
14
- viewerUrl; // Browserbase live viewer URL
15
- constructor(page, browser, ai, timeout = 30_000, viewerUrl) {
13
+ viewerUrl;
14
+ constructor(page, browserHandle, ai, timeout = 30_000, viewerUrl) {
16
15
  this.page = page;
17
- this.browser = browser;
16
+ this.browserHandle = browserHandle;
18
17
  this.ai = ai;
19
18
  this.defaultTimeout = timeout;
20
19
  this.viewerUrl = viewerUrl;
21
20
  this.interceptor = new interceptor_1.Interceptor(page);
22
21
  this.observer = new observer_1.Observer(page);
23
22
  // Start capturing traffic immediately — t=0
24
- // This catches requests the site fires on load before we do anything
25
23
  this.interceptor.start();
26
24
  }
25
+ // ─── Accessors for SmartNavigator ───────────────────────────────────────────
26
+ getPage() { return this.page; }
27
+ getInterceptor() { return this.interceptor; }
28
+ getObserver() { return this.observer; }
29
+ getAI() { return this.ai; }
27
30
  // ─── Navigation ───────────────────────────────────────────────────────────────
28
31
  async goto(url, options) {
29
32
  const timeout = options?.timeout ?? this.defaultTimeout;
30
- try {
31
- await this.page.goto(url, {
32
- waitUntil: "domcontentloaded",
33
- timeout,
34
- });
35
- }
36
- catch (err) {
37
- // On timeout, don't destroy the session — wait for network idle on the
38
- // partially loaded page, then continue if the URL actually changed.
39
- try {
40
- await this.page.waitForLoadState("networkidle", { timeout: 5_000 });
41
- }
42
- catch {
43
- // networkidle also timed out — that's fine, continue with what we have
44
- }
45
- const currentUrl = this.page.url();
46
- if (currentUrl === "about:blank" || currentUrl === "") {
47
- throw new Error(`Navigation to ${url} failed: page is still on about:blank — ${err.message}`);
48
- }
49
- // Page URL changed — partial load is usable, continue
50
- }
33
+ await this.page.goto(url, {
34
+ waitUntil: "domcontentloaded",
35
+ timeout,
36
+ });
51
37
  // Wait for network to go idle — catches XHR that SPAs fire after DOM is parsed.
52
38
  try {
53
- await this.page.waitForLoadState("networkidle", { timeout: 8_000 });
39
+ await this.page.waitForNetworkIdle({ timeout: 8_000 });
54
40
  }
55
41
  catch {
56
42
  // networkidle timed out — page likely has background polling, continue anyway
57
43
  }
44
+ // Minimum 800ms after networkidle (or timeout) — some SPAs fire XHR slightly after idle
58
45
  await this.page.waitForTimeout(800);
59
46
  }
60
47
  async wait(ms) {
@@ -65,21 +52,28 @@ class Session {
65
52
  }
66
53
  // ─── Observe — always first, before any action ────────────────────────────────
67
54
  async observe() {
68
- this.lastInteractiveElements.clear();
69
55
  return this.observer.observe();
70
56
  }
71
- // ─── Screenshot only when observe() is not enough ──────────────────────────
57
+ // ─── Form and autocomplete detection ────────────────────────────────────────
58
+ async detectForms() {
59
+ return this.observer.detectForms();
60
+ }
61
+ async detectAutocomplete() {
62
+ return this.observer.detectAutocomplete();
63
+ }
64
+ async detectErrors() {
65
+ return this.observer.detectErrors();
66
+ }
67
+ // ─── Screenshot ────────────────────────────────────────────────────────────────
72
68
  async screenshot() {
73
- const buffer = await this.page.screenshot({ type: "jpeg", quality: 80 });
69
+ const result = await this.page.screenshot({ type: "jpeg", quality: 80, encoding: "base64" });
74
70
  return {
75
- base64: buffer.toString("base64"),
71
+ base64: typeof result === "string" ? result : result.toString("base64"),
76
72
  mimeType: "image/jpeg",
77
73
  timestamp: Date.now(),
78
74
  };
79
75
  }
80
76
  // ─── DOM text extraction — fallback when no useful XHR is captured ────────────
81
- // Strips chrome (nav, footer, scripts) and returns visible text capped at 15k chars.
82
- // Used by web-agent when the page renders data server-side with no API calls.
83
77
  async extractPageContent() {
84
78
  return await this.page.evaluate(() => {
85
79
  const clone = document.body.cloneNode(true);
@@ -93,344 +87,99 @@ class Session {
93
87
  .slice(0, 15000);
94
88
  });
95
89
  }
96
- // ─── DOM text + links extraction — for link-aware intelligent navigation ─────
97
- // Returns visible text (same as extractPageContent) plus all <a> links on the page.
98
- // Used by the caller to feed real links into AI so it picks from actual URLs, not guesses.
99
- async extractPageWithLinks() {
100
- return await this.page.evaluate(() => {
101
- const clone = document.body.cloneNode(true);
102
- clone.querySelectorAll("script,style,nav,footer,header")
103
- .forEach((el) => el.remove());
104
- const content = clone.innerText
105
- .split("\n")
106
- .map((l) => l.trim())
107
- .filter((l) => l.length > 0)
108
- .join("\n")
109
- .slice(0, 12000);
110
- const links = [];
111
- const seen = new Set();
112
- document.querySelectorAll("a[href]").forEach((a) => {
113
- const href = a.href;
114
- const text = a.innerText.trim().slice(0, 100);
115
- if (href && text && !seen.has(href) && href.startsWith("http")) {
116
- seen.add(href);
117
- links.push({ href, text });
118
- }
119
- });
120
- return { content, links: links.slice(0, 80) };
121
- });
122
- }
123
- // ─── Interactive DOM observation — full interactive element scan ─────────────
124
- // Replaces extractPageWithLinks() for agent navigation: returns ALL interactive
125
- // elements (buttons, menus, dropdowns, inputs) not just anchor links.
126
- // Sets data-rid attributes on matched elements so clickById() can target them.
127
- async observeInteractiveDom() {
128
- const INTERACTIVE_SEL = [
129
- "button", "[role='button']", "[role='switch']",
130
- "input:not([type='hidden'])",
131
- "a[href]", "select", "textarea",
132
- "[role='menuitem']", "[role='option']", "[role='tab']", "[role='combobox']",
133
- "[onclick]", "[tabindex]:not([tabindex='-1'])",
134
- ].join(", ");
135
- const result = await this.page.evaluate((interactiveSel) => {
136
- // Clean previous markers
137
- document.querySelectorAll("[data-rid]").forEach((el) => el.removeAttribute("data-rid"));
138
- const isVisible = (el) => {
139
- const htmlEl = el;
140
- // offsetParent is null when the element or any ancestor has display:none.
141
- // Exception: <body>, <html>, and position:fixed/sticky elements legitimately have offsetParent === null.
142
- if (htmlEl.offsetParent === null) {
143
- const style = window.getComputedStyle(el);
144
- const pos = style.position;
145
- if (pos !== "fixed" && pos !== "sticky" && el.tagName !== "BODY" && el.tagName !== "HTML") {
146
- return false;
147
- }
148
- }
149
- const style = window.getComputedStyle(el);
150
- if (style.visibility === "hidden" || style.opacity === "0")
151
- return false;
152
- const rect = el.getBoundingClientRect();
153
- if (rect.width === 0 || rect.height === 0)
154
- return false;
155
- // Check if element is within viewport bounds
156
- const vh = window.innerHeight;
157
- const vw = window.innerWidth;
158
- return rect.bottom > 0 && rect.top < vh && rect.right > 0 && rect.left < vw;
159
- };
160
- const truncate = (v, max = 60) => v.length > max ? v.slice(0, max) : v;
161
- const resolveUrl = (href) => {
162
- if (!href)
163
- return href;
164
- try {
165
- return new URL(href, window.location.href).href;
166
- }
167
- catch {
168
- return href;
169
- }
170
- };
171
- const ATTR_KEYS = [
172
- "title", "type", "name", "role", "aria-label",
173
- "placeholder", "value", "alt", "href", "aria-expanded",
174
- ];
175
- const ATTR_RENDER_ORDER = [
176
- "title", "type", "name", "role", "aria_label",
177
- "placeholder", "value", "alt", "href", "aria_expanded",
178
- ];
179
- const getAttrs = (el) => {
180
- const out = {};
181
- for (const attr of ATTR_KEYS) {
182
- let val = el.getAttribute(attr);
183
- if (val == null || val === "")
184
- continue;
185
- if (attr === "href")
186
- val = resolveUrl(val);
187
- const key = attr.replace(/-/g, "_");
188
- out[key] = truncate(val);
189
- }
190
- if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement)
191
- && el.value) {
192
- out["value"] = truncate(el.value);
193
- }
194
- return out;
195
- };
196
- const getInnerText = (el) => (el.textContent ?? "").replace(/\s+/g, " ").trim().slice(0, 100);
197
- // B — buttons, checkboxes, radios, switches, role="button"
198
- // L — anchor links
199
- // I — inputs, textareas, selects
200
- // M — menuitem, option, tab, combobox, other interactables
201
- const categorize = (el) => {
202
- const tag = el.tagName.toLowerCase();
203
- const role = el.getAttribute("role") ?? "";
204
- const type = el.type ?? "";
205
- if (tag === "a" && el.hasAttribute("href"))
206
- return "L";
207
- if (tag === "textarea" || tag === "select")
208
- return "I";
209
- if (tag === "input" && !["submit", "button", "checkbox", "radio", "hidden"].includes(type))
210
- return "I";
211
- if (tag === "button")
212
- return "B";
213
- if (tag === "input" && ["submit", "button", "checkbox", "radio"].includes(type))
214
- return "B";
215
- if (role === "button" || role === "switch")
216
- return "B";
217
- if (["menuitem", "option", "tab", "combobox"].includes(role))
218
- return "M";
219
- return "M";
220
- };
221
- const interactiveSet = new Set();
222
- document.querySelectorAll(interactiveSel).forEach((el) => interactiveSet.add(el));
223
- // Only include text near interactive elements — headings, labels, or
224
- // nodes whose parent/grandparent contains an interactive child.
225
- const isNearInteractive = (parent) => {
226
- if (!parent)
227
- return false;
228
- const tag = parent.tagName.toLowerCase();
229
- if (["h1", "h2", "h3", "h4", "h5", "h6", "label", "legend", "figcaption", "caption", "th", "li"].includes(tag))
230
- return true;
231
- try {
232
- if (parent.querySelector(interactiveSel))
233
- return true;
234
- if (parent.parentElement?.querySelector(interactiveSel))
235
- return true;
236
- }
237
- catch { /* selector might fail on exotic DOMs */ }
238
- return false;
239
- };
240
- const SELF_CLOSING = new Set(["input", "img", "br", "hr"]);
241
- const lines = [];
242
- const elements = [];
243
- const counters = { B: 0, L: 0, I: 0, M: 0 };
244
- const walk = (node) => {
245
- if (node.nodeType === Node.TEXT_NODE) {
246
- const text = (node.textContent ?? "").replace(/\s+/g, " ").trim();
247
- if (text.length < 2)
248
- return;
249
- const parent = node.parentElement;
250
- if (!parent)
251
- return;
252
- if (parent.tagName === "SCRIPT" || parent.tagName === "STYLE" || parent.tagName === "NOSCRIPT")
253
- return;
254
- if (interactiveSet.has(parent))
255
- return; // text goes inside the tag
256
- if (!isVisible(parent))
257
- return;
258
- if (!isNearInteractive(parent))
259
- return;
260
- lines.push(`_[:] ${text.slice(0, 150)}`);
261
- return;
262
- }
263
- if (node.nodeType !== Node.ELEMENT_NODE)
264
- return;
265
- const el = node;
266
- const tag = el.tagName.toLowerCase();
267
- if (tag === "script" || tag === "style" || tag === "noscript")
268
- return;
269
- if (interactiveSet.has(el)) {
270
- if (!isVisible(el))
271
- return;
272
- const cat = categorize(el);
273
- counters[cat]++;
274
- const id = `${cat}${counters[cat]}`;
275
- const attrs = getAttrs(el);
276
- const text = getInnerText(el);
277
- const htmlTag = tag === "a" ? "a" : tag;
278
- // Mark element for Playwright selector lookup
279
- el.setAttribute("data-rid", id);
280
- const attrParts = [];
281
- for (const key of ATTR_RENDER_ORDER) {
282
- if (attrs[key])
283
- attrParts.push(`${key}="${attrs[key]}"`);
284
- }
285
- const attrStr = attrParts.length > 0 ? " " + attrParts.join(" ") : "";
286
- if (SELF_CLOSING.has(htmlTag)) {
287
- lines.push(`${id}[:] <${htmlTag}${attrStr}/>`);
288
- }
289
- else {
290
- lines.push(`${id}[:] <${htmlTag}${attrStr}>${text}</${htmlTag}>`);
291
- }
292
- elements.push({ id, selector: `[data-rid="${id}"]` });
293
- return; // don't recurse into interactive elements
294
- }
295
- for (const child of Array.from(el.childNodes)) {
296
- walk(child);
297
- }
298
- };
299
- walk(document.body);
300
- return { lines, elements };
301
- }, INTERACTIVE_SEL);
302
- this.lastInteractiveElements = new Map(result.elements.map((e) => [e.id, e.selector]));
303
- return {
304
- dom: result.lines.join("\n"),
305
- elements: result.elements,
306
- };
307
- }
308
- // ─── Intelligent DOM scrape — Notte-style content understanding ──────────────
309
- // Passes visible page text to Claude Sonnet and asks whether the goal data is present.
310
- // If not found, follows nextUrl suggestions up to maxHops times before giving up.
311
- // Caller must supply initialContent (from extractPageContent()) to avoid a double call.
312
- async scrapeWithGoal(goal, initialContent, seedValues, maxHops = 3, initialLinks) {
313
- let content = initialContent;
314
- let links = initialLinks;
315
- for (let hop = 0; hop <= maxHops; hop++) {
316
- const result = await this.ai.scrapePageContent(goal, content, seedValues, links);
317
- if (result.found || !result.nextUrl || hop === maxHops) {
318
- return result;
319
- }
320
- // Follow the suggested link and re-scrape
90
+ // ─── Interaction ──────────────────────────────────────────────────────────────
91
+ async clickById(elementId) {
92
+ // First try cached CSS selector (reliable)
93
+ const selector = this.observer.getSelector(elementId);
94
+ if (selector) {
321
95
  try {
322
- await this.goto(result.nextUrl);
323
- const page = await this.extractPageWithLinks();
324
- content = page.content;
325
- links = page.links;
96
+ await this.page.click(selector);
97
+ return;
326
98
  }
327
99
  catch {
328
- return result;
100
+ // Fall through to coordinate-based click
329
101
  }
330
102
  }
331
- return { found: false, data: null, nextUrl: null, reasoning: "max hops reached without finding data" };
103
+ const coords = await this.observer.resolveElementId(elementId);
104
+ if (!coords)
105
+ throw new Error(`Element ${elementId} not found in DOM`);
106
+ await this.page.mouse.click(coords.x, coords.y);
332
107
  }
333
- // ─── Interaction ──────────────────────────────────────────────────────────────
334
- // Click by element ID checks stored selectors from observeInteractiveDom()
335
- // first, then falls back to coordinate-based resolution from observe().
336
- async clickById(elementId) {
337
- const pagesBefore = this.page.context().pages().length;
338
- const selector = this.lastInteractiveElements.get(elementId);
108
+ async fillById(elementId, value) {
109
+ // First try cached CSS selector (reliable)
110
+ const selector = this.observer.getSelector(elementId);
339
111
  if (selector) {
340
- await this.page.locator(selector).click();
341
- }
342
- else {
343
- const coords = await this.observer.resolveElementId(elementId);
344
- if (!coords)
345
- throw new Error(`Element ${elementId} not found in DOM`);
346
- await this.page.mouse.click(coords.x, coords.y);
112
+ try {
113
+ await this.page.click(selector, { clickCount: 3 }); // select all
114
+ await this.page.keyboard.press("Backspace");
115
+ await this.page.type(selector, value, { delay: 80 });
116
+ return;
117
+ }
118
+ catch {
119
+ // Fall through to coordinate-based fill
120
+ }
347
121
  }
348
- await this.switchToNewTabIfOpened(pagesBefore);
349
- }
350
- // Fill input by element ID from observe()
351
- async fillById(elementId, value) {
352
122
  const coords = await this.observer.resolveElementId(elementId);
353
123
  if (!coords)
354
124
  throw new Error(`Element ${elementId} not found in DOM`);
355
125
  await this.page.mouse.click(coords.x, coords.y);
356
126
  await this.page.keyboard.press("Control+a");
357
- await this.page.keyboard.type(value);
127
+ await this.page.keyboard.type(value, { delay: 80 });
128
+ }
129
+ // ─── Direct selector interaction (for autocomplete clicks) ──────────────────
130
+ async clickSelector(selector) {
131
+ const el = await this.page.waitForSelector(selector, { timeout: 3_000, visible: true });
132
+ if (el) {
133
+ await el.click();
134
+ }
135
+ else {
136
+ throw new Error(`Selector ${selector} not found`);
137
+ }
358
138
  }
359
139
  // ─── Captured requests ────────────────────────────────────────────────────────
360
- // All captured requests sorted by relevance score
361
140
  getCapturedRequests() {
362
141
  return this.interceptor.getAll();
363
142
  }
364
- // Only useful XHR/fetch requests above score threshold
365
143
  getUsefulRequests(minScore = 1) {
366
144
  return this.interceptor.getUseful(minScore);
367
145
  }
368
- // Only requests that fired as a direct result of a user action
369
146
  getActionTriggeredRequests() {
370
147
  return this.interceptor.getActionTriggered();
371
148
  }
372
- // Listen to requests in real-time as they come in
373
149
  onRequest(handler) {
374
150
  this.interceptor.on("request", handler);
375
151
  }
152
+ hasSeedValueInResponses(seedValues) {
153
+ return this.interceptor.hasSeedValueInResponses(seedValues);
154
+ }
155
+ markActionTimestamp() {
156
+ this.interceptor.markActionTimestamp();
157
+ }
376
158
  // ─── act() — achieve a goal through multi-step navigation ────────────────────
377
159
  async act(goal, maxSteps = 10, seedValues) {
378
160
  const steps = [];
379
- const trajectory = [];
380
- let consecutiveFailures = 0;
381
- const MAX_CONSECUTIVE_FAILURES = 3;
161
+ const history = [];
382
162
  for (let i = 0; i < maxSteps; i++) {
383
- // ALWAYS observe first — no screenshot by default
384
163
  const observation = await this.observe();
385
- let completion;
164
+ let action;
386
165
  let usedScreenshot = false;
387
- // If observe() gives us enough elements, use text-based decision
388
166
  if (observation.elements.filter((e) => e.visible).length > 0) {
389
- completion = await this.ai.decideFromObservation(goal, observation, trajectory, i + 1, maxSteps, seedValues);
167
+ action = await this.ai.decideFromObservation(goal, observation, history, seedValues);
390
168
  }
391
169
  else {
392
- // Fallback: page has no readable elements (canvas, iframe, etc.) → screenshot
393
170
  const shot = await this.screenshot();
394
- completion = await this.ai.decideFromScreenshot(goal, shot.base64, trajectory, i + 1, maxSteps, seedValues);
171
+ action = await this.ai.decideFromScreenshot(goal, shot.base64, history, seedValues);
395
172
  usedScreenshot = true;
396
173
  }
397
- const { action, state } = completion;
398
174
  steps.push({ action, observeBefore: observation, usedScreenshot });
399
- // Track trajectory for multi-turn conversation (assistant response, then next user observation)
400
- trajectory.push({
401
- role: "assistant",
402
- content: JSON.stringify(completion),
403
- });
404
- // Track consecutive failures
405
- if (state.previous_goal_status === "failure") {
406
- consecutiveFailures++;
407
- if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
408
- return {
409
- success: false,
410
- steps,
411
- finalUrl: this.url,
412
- reason: `${MAX_CONSECUTIVE_FAILURES} consecutive failures: ${state.previous_goal_eval}`,
413
- };
414
- }
415
- }
416
- else if (state.previous_goal_status === "success") {
417
- consecutiveFailures = 0;
418
- }
419
- // Terminal states
175
+ history.push(`Step ${i + 1}: ${action.type} ${action.reason}`);
420
176
  if (action.type === "done") {
421
177
  return { success: true, steps, finalUrl: this.url, reason: action.reason };
422
178
  }
423
179
  if (action.type === "impossible") {
424
180
  return { success: false, steps, finalUrl: this.url, reason: action.reason };
425
181
  }
426
- // Execute action
427
- const actionSuccess = await this.executeAction(action);
428
- // Add execution result to trajectory
429
- const resultMsg = actionSuccess
430
- ? `Action '${action.type}' succeeded`
431
- : `Action '${action.type}' failed`;
432
- trajectory.push({ role: "user", content: resultMsg });
433
- // Short wait after interactions for page to settle
182
+ await this.executeAction(action);
434
183
  await this.page.waitForTimeout(500);
435
184
  }
436
185
  return {
@@ -441,10 +190,7 @@ class Session {
441
190
  };
442
191
  }
443
192
  // ─── findData() — navigate + capture requests until goal achieved ─────────────
444
- // seedValues: concrete example inputs to use during discovery (e.g. what to type in a search box)
445
193
  async findData(goal, maxSteps = 15, seedValues) {
446
- // Do NOT clear — requests captured during goto() (t=0) are valuable.
447
- // goto() already waits for networkidle + 800ms minimum, so no additional delay needed here.
448
194
  const actResult = await this.act(goal, maxSteps, seedValues);
449
195
  const capturedRequests = this.interceptor.getUseful();
450
196
  return {
@@ -455,84 +201,49 @@ class Session {
455
201
  // ─── Cleanup ──────────────────────────────────────────────────────────────────
456
202
  async close() {
457
203
  this.interceptor.stop();
458
- await this.page.close();
204
+ if (!this.page.isClosed()) {
205
+ await this.page.close();
206
+ }
459
207
  }
460
208
  async closeAll() {
461
209
  this.interceptor.stop();
462
- await this.browser.close();
463
- }
464
- // ─── New tab detection ─────────────────────────────────────────────────────────
465
- // After a click, check if a new tab was opened and switch the session to it.
466
- // Rebuilds interceptor/observer so they reference the new page.
467
- async switchToNewTabIfOpened(pagesBeforeCount) {
468
- await this.page.waitForTimeout(1000);
469
- const pages = this.page.context().pages();
470
- if (pages.length <= pagesBeforeCount)
471
- return;
472
- // Switch to the newest tab
473
- const newPage = pages[pages.length - 1];
474
- if (newPage === this.page)
475
- return;
476
- // Tear down interceptor on old page, rebuild on new page
477
- this.interceptor.stop();
478
- this.page = newPage;
479
- this.interceptor = new interceptor_1.Interceptor(this.page);
480
- this.observer = new observer_1.Observer(this.page);
481
- this.interceptor.start();
482
- try {
483
- await this.page.waitForLoadState("domcontentloaded", { timeout: this.defaultTimeout });
484
- }
485
- catch {
486
- // Partial load — continue with what we have
487
- }
210
+ await this.browserHandle.close();
488
211
  }
489
212
  // ─── Execute action ───────────────────────────────────────────────────────────
490
213
  async executeAction(action) {
491
- try {
492
- switch (action.type) {
493
- case "click":
494
- this.interceptor.markActionTimestamp();
495
- await this.clickById(action.elementId).catch(async () => {
496
- await this.page.getByText(action.elementId).first().click();
497
- });
498
- break;
499
- case "fill":
500
- this.interceptor.markActionTimestamp();
501
- await this.fillById(action.elementId, action.value).catch(async () => {
502
- await this.page.getByRole("textbox").first().fill(action.value);
503
- });
504
- break;
505
- case "scroll":
506
- this.interceptor.markActionTimestamp();
507
- await this.page.evaluate(({ direction, amount }) => {
508
- window.scrollBy(0, direction === "down" ? amount : -amount);
509
- }, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
510
- // Execution context destroyed = page navigated mid-scroll. Not a crash — continue.
511
- });
512
- break;
513
- case "navigate":
514
- this.interceptor.markActionTimestamp();
515
- await this.goto(action.url);
516
- break;
517
- case "press_key":
518
- await this.page.keyboard.press(action.key);
519
- break;
520
- case "wait":
521
- await this.page.waitForTimeout(action.ms);
522
- break;
523
- case "scrape":
524
- // Scrape is handled by the caller — no-op here
525
- break;
526
- case "screenshot":
527
- // Already handled in act() loop
528
- break;
529
- default:
530
- break;
531
- }
532
- return true;
533
- }
534
- catch {
535
- return false;
214
+ switch (action.type) {
215
+ case "click":
216
+ this.interceptor.markActionTimestamp();
217
+ await this.clickById(action.elementId).catch(() => {
218
+ // If all resolution fails, log and continue
219
+ console.warn(`[Session] Click failed for ${action.elementId}`);
220
+ });
221
+ break;
222
+ case "fill":
223
+ this.interceptor.markActionTimestamp();
224
+ await this.fillById(action.elementId, action.value).catch(() => {
225
+ console.warn(`[Session] Fill failed for ${action.elementId}`);
226
+ });
227
+ break;
228
+ case "scroll":
229
+ this.interceptor.markActionTimestamp();
230
+ await this.page.evaluate(({ direction, amount }) => {
231
+ window.scrollBy(0, direction === "down" ? amount : -amount);
232
+ }, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
233
+ // Execution context destroyed = page navigated mid-scroll
234
+ });
235
+ break;
236
+ case "navigate":
237
+ this.interceptor.markActionTimestamp();
238
+ await this.page.goto(action.url, { waitUntil: "domcontentloaded", timeout: this.defaultTimeout });
239
+ break;
240
+ case "wait":
241
+ await this.page.waitForTimeout(action.ms);
242
+ break;
243
+ case "screenshot":
244
+ break;
245
+ default:
246
+ break;
536
247
  }
537
248
  }
538
249
  }