zan-browser 1.3.38 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/ai.d.ts +3 -16
  2. package/dist/ai.d.ts.map +1 -1
  3. package/dist/ai.js +101 -208
  4. package/dist/ai.js.map +1 -1
  5. package/dist/blacklist.d.ts +2 -0
  6. package/dist/blacklist.d.ts.map +1 -0
  7. package/dist/blacklist.js +115 -0
  8. package/dist/blacklist.js.map +1 -0
  9. package/dist/browser-provider.d.ts +81 -0
  10. package/dist/browser-provider.d.ts.map +1 -0
  11. package/dist/browser-provider.js +6 -0
  12. package/dist/browser-provider.js.map +1 -0
  13. package/dist/browser.d.ts +1 -1
  14. package/dist/browser.d.ts.map +1 -1
  15. package/dist/browser.js +57 -59
  16. package/dist/browser.js.map +1 -1
  17. package/dist/index.d.ts +7 -1
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +5 -1
  20. package/dist/index.js.map +1 -1
  21. package/dist/interceptor.d.ts +6 -3
  22. package/dist/interceptor.d.ts.map +1 -1
  23. package/dist/interceptor.js +61 -21
  24. package/dist/interceptor.js.map +1 -1
  25. package/dist/navigator.d.ts +40 -0
  26. package/dist/navigator.d.ts.map +1 -0
  27. package/dist/navigator.js +506 -0
  28. package/dist/navigator.js.map +1 -0
  29. package/dist/observer.d.ts +23 -3
  30. package/dist/observer.d.ts.map +1 -1
  31. package/dist/observer.js +310 -270
  32. package/dist/observer.js.map +1 -1
  33. package/dist/perception.d.ts +42 -0
  34. package/dist/perception.d.ts.map +1 -0
  35. package/dist/perception.js +140 -0
  36. package/dist/perception.js.map +1 -0
  37. package/dist/providers/browserbase.d.ts +12 -0
  38. package/dist/providers/browserbase.d.ts.map +1 -0
  39. package/dist/providers/browserbase.js +226 -0
  40. package/dist/providers/browserbase.js.map +1 -0
  41. package/dist/providers/puppeteer-local.d.ts +5 -0
  42. package/dist/providers/puppeteer-local.d.ts.map +1 -0
  43. package/dist/providers/puppeteer-local.js +218 -0
  44. package/dist/providers/puppeteer-local.js.map +1 -0
  45. package/dist/session.d.ts +17 -23
  46. package/dist/session.d.ts.map +1 -1
  47. package/dist/session.js +123 -407
  48. package/dist/session.js.map +1 -1
  49. package/dist/types.d.ts +1 -32
  50. package/dist/types.d.ts.map +1 -1
  51. package/package.json +3 -2
package/dist/session.js CHANGED
@@ -5,63 +5,44 @@ const interceptor_1 = require("./interceptor");
5
5
  const observer_1 = require("./observer");
6
6
  class Session {
7
7
  page;
8
- browser;
8
+ browserHandle;
9
9
  interceptor;
10
10
  observer;
11
11
  ai;
12
12
  defaultTimeout;
13
- lastInteractiveElements = new Map();
14
- viewerUrl; // Browserbase live viewer URL
15
- constructor(page, browser, ai, timeout = 30_000, viewerUrl) {
13
+ viewerUrl;
14
+ constructor(page, browserHandle, ai, timeout = 30_000, viewerUrl) {
16
15
  this.page = page;
17
- this.browser = browser;
16
+ this.browserHandle = browserHandle;
18
17
  this.ai = ai;
19
18
  this.defaultTimeout = timeout;
20
19
  this.viewerUrl = viewerUrl;
21
20
  this.interceptor = new interceptor_1.Interceptor(page);
22
21
  this.observer = new observer_1.Observer(page);
23
22
  // Start capturing traffic immediately — t=0
24
- // This catches requests the site fires on load before we do anything
25
23
  this.interceptor.start();
26
24
  }
25
+ // ─── Accessors for SmartNavigator ───────────────────────────────────────────
26
+ getPage() { return this.page; }
27
+ getInterceptor() { return this.interceptor; }
28
+ getObserver() { return this.observer; }
29
+ getAI() { return this.ai; }
27
30
  // ─── Navigation ───────────────────────────────────────────────────────────────
28
31
  async goto(url, options) {
29
32
  const timeout = options?.timeout ?? this.defaultTimeout;
30
- console.log(`[goto] navigating to: ${url} (timeout: ${timeout}ms)`);
31
- try {
32
- await this.page.goto(url, {
33
- waitUntil: "domcontentloaded",
34
- timeout,
35
- });
36
- console.log(`[goto] domcontentloaded ok — url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
37
- }
38
- catch (err) {
39
- console.log(`[goto] page.goto failed: ${err.message}`);
40
- // On timeout, don't destroy the session — wait for network idle on the
41
- // partially loaded page, then continue if the URL actually changed.
42
- try {
43
- await this.page.waitForLoadState("networkidle", { timeout: 5_000 });
44
- console.log(`[goto] networkidle fallback ok — url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
45
- }
46
- catch (err2) {
47
- console.log(`[goto] networkidle fallback also timed out: ${err2.message}`);
48
- }
49
- const currentUrl = this.page.url();
50
- if (currentUrl === "about:blank" || currentUrl === "") {
51
- console.log(`[goto] FATAL: still on about:blank — throwing`);
52
- throw new Error(`Navigation to ${url} failed: page is still on about:blank — ${err.message}`);
53
- }
54
- console.log(`[goto] partial load accepted — url: ${currentUrl}, tabs: ${this.page.context().pages().length}`);
55
- }
33
+ await this.page.goto(url, {
34
+ waitUntil: "domcontentloaded",
35
+ timeout,
36
+ });
56
37
  // Wait for network to go idle — catches XHR that SPAs fire after DOM is parsed.
57
38
  try {
58
- await this.page.waitForLoadState("networkidle", { timeout: 8_000 });
39
+ await this.page.waitForNetworkIdle({ timeout: 8_000 });
59
40
  }
60
41
  catch {
61
42
  // networkidle timed out — page likely has background polling, continue anyway
62
43
  }
44
+ // Minimum 800ms after networkidle (or timeout) — some SPAs fire XHR slightly after idle
63
45
  await this.page.waitForTimeout(800);
64
- console.log(`[goto] done — final url: ${this.page.url()}, tabs: ${this.page.context().pages().length}`);
65
46
  }
66
47
  async wait(ms) {
67
48
  await this.page.waitForTimeout(ms);
@@ -71,21 +52,28 @@ class Session {
71
52
  }
72
53
  // ─── Observe — always first, before any action ────────────────────────────────
73
54
  async observe() {
74
- this.lastInteractiveElements.clear();
75
55
  return this.observer.observe();
76
56
  }
77
- // ─── Screenshot only when observe() is not enough ──────────────────────────
57
+ // ─── Form and autocomplete detection ────────────────────────────────────────
58
+ async detectForms() {
59
+ return this.observer.detectForms();
60
+ }
61
+ async detectAutocomplete() {
62
+ return this.observer.detectAutocomplete();
63
+ }
64
+ async detectErrors() {
65
+ return this.observer.detectErrors();
66
+ }
67
+ // ─── Screenshot ────────────────────────────────────────────────────────────────
78
68
  async screenshot() {
79
- const buffer = await this.page.screenshot({ type: "jpeg", quality: 80 });
69
+ const result = await this.page.screenshot({ type: "jpeg", quality: 80, encoding: "base64" });
80
70
  return {
81
- base64: buffer.toString("base64"),
71
+ base64: typeof result === "string" ? result : result.toString("base64"),
82
72
  mimeType: "image/jpeg",
83
73
  timestamp: Date.now(),
84
74
  };
85
75
  }
86
76
  // ─── DOM text extraction — fallback when no useful XHR is captured ────────────
87
- // Strips chrome (nav, footer, scripts) and returns visible text capped at 15k chars.
88
- // Used by web-agent when the page renders data server-side with no API calls.
89
77
  async extractPageContent() {
90
78
  return await this.page.evaluate(() => {
91
79
  const clone = document.body.cloneNode(true);
@@ -99,344 +87,99 @@ class Session {
99
87
  .slice(0, 15000);
100
88
  });
101
89
  }
102
- // ─── DOM text + links extraction — for link-aware intelligent navigation ─────
103
- // Returns visible text (same as extractPageContent) plus all <a> links on the page.
104
- // Used by the caller to feed real links into AI so it picks from actual URLs, not guesses.
105
- async extractPageWithLinks() {
106
- return await this.page.evaluate(() => {
107
- const clone = document.body.cloneNode(true);
108
- clone.querySelectorAll("script,style,nav,footer,header")
109
- .forEach((el) => el.remove());
110
- const content = clone.innerText
111
- .split("\n")
112
- .map((l) => l.trim())
113
- .filter((l) => l.length > 0)
114
- .join("\n")
115
- .slice(0, 12000);
116
- const links = [];
117
- const seen = new Set();
118
- document.querySelectorAll("a[href]").forEach((a) => {
119
- const href = a.href;
120
- const text = a.innerText.trim().slice(0, 100);
121
- if (href && text && !seen.has(href) && href.startsWith("http")) {
122
- seen.add(href);
123
- links.push({ href, text });
124
- }
125
- });
126
- return { content, links: links.slice(0, 80) };
127
- });
128
- }
129
- // ─── Interactive DOM observation — full interactive element scan ─────────────
130
- // Replaces extractPageWithLinks() for agent navigation: returns ALL interactive
131
- // elements (buttons, menus, dropdowns, inputs) not just anchor links.
132
- // Sets data-rid attributes on matched elements so clickById() can target them.
133
- async observeInteractiveDom() {
134
- const INTERACTIVE_SEL = [
135
- "button", "[role='button']", "[role='switch']",
136
- "input:not([type='hidden'])",
137
- "a[href]", "select", "textarea",
138
- "[role='menuitem']", "[role='option']", "[role='tab']", "[role='combobox']",
139
- "[onclick]", "[tabindex]:not([tabindex='-1'])",
140
- ].join(", ");
141
- const result = await this.page.evaluate((interactiveSel) => {
142
- // Clean previous markers
143
- document.querySelectorAll("[data-rid]").forEach((el) => el.removeAttribute("data-rid"));
144
- const isVisible = (el) => {
145
- const htmlEl = el;
146
- // offsetParent is null when the element or any ancestor has display:none.
147
- // Exception: <body>, <html>, and position:fixed/sticky elements legitimately have offsetParent === null.
148
- if (htmlEl.offsetParent === null) {
149
- const style = window.getComputedStyle(el);
150
- const pos = style.position;
151
- if (pos !== "fixed" && pos !== "sticky" && el.tagName !== "BODY" && el.tagName !== "HTML") {
152
- return false;
153
- }
154
- }
155
- const style = window.getComputedStyle(el);
156
- if (style.visibility === "hidden" || style.opacity === "0")
157
- return false;
158
- const rect = el.getBoundingClientRect();
159
- if (rect.width === 0 || rect.height === 0)
160
- return false;
161
- // Check if element is within viewport bounds
162
- const vh = window.innerHeight;
163
- const vw = window.innerWidth;
164
- return rect.bottom > 0 && rect.top < vh && rect.right > 0 && rect.left < vw;
165
- };
166
- const truncate = (v, max = 60) => v.length > max ? v.slice(0, max) : v;
167
- const resolveUrl = (href) => {
168
- if (!href)
169
- return href;
170
- try {
171
- return new URL(href, window.location.href).href;
172
- }
173
- catch {
174
- return href;
175
- }
176
- };
177
- const ATTR_KEYS = [
178
- "title", "type", "name", "role", "aria-label",
179
- "placeholder", "value", "alt", "href", "aria-expanded",
180
- ];
181
- const ATTR_RENDER_ORDER = [
182
- "title", "type", "name", "role", "aria_label",
183
- "placeholder", "value", "alt", "href", "aria_expanded",
184
- ];
185
- const getAttrs = (el) => {
186
- const out = {};
187
- for (const attr of ATTR_KEYS) {
188
- let val = el.getAttribute(attr);
189
- if (val == null || val === "")
190
- continue;
191
- if (attr === "href")
192
- val = resolveUrl(val);
193
- const key = attr.replace(/-/g, "_");
194
- out[key] = truncate(val);
195
- }
196
- if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement)
197
- && el.value) {
198
- out["value"] = truncate(el.value);
199
- }
200
- return out;
201
- };
202
- const getInnerText = (el) => (el.textContent ?? "").replace(/\s+/g, " ").trim().slice(0, 100);
203
- // B — buttons, checkboxes, radios, switches, role="button"
204
- // L — anchor links
205
- // I — inputs, textareas, selects
206
- // M — menuitem, option, tab, combobox, other interactables
207
- const categorize = (el) => {
208
- const tag = el.tagName.toLowerCase();
209
- const role = el.getAttribute("role") ?? "";
210
- const type = el.type ?? "";
211
- if (tag === "a" && el.hasAttribute("href"))
212
- return "L";
213
- if (tag === "textarea" || tag === "select")
214
- return "I";
215
- if (tag === "input" && !["submit", "button", "checkbox", "radio", "hidden"].includes(type))
216
- return "I";
217
- if (tag === "button")
218
- return "B";
219
- if (tag === "input" && ["submit", "button", "checkbox", "radio"].includes(type))
220
- return "B";
221
- if (role === "button" || role === "switch")
222
- return "B";
223
- if (["menuitem", "option", "tab", "combobox"].includes(role))
224
- return "M";
225
- return "M";
226
- };
227
- const interactiveSet = new Set();
228
- document.querySelectorAll(interactiveSel).forEach((el) => interactiveSet.add(el));
229
- // Only include text near interactive elements — headings, labels, or
230
- // nodes whose parent/grandparent contains an interactive child.
231
- const isNearInteractive = (parent) => {
232
- if (!parent)
233
- return false;
234
- const tag = parent.tagName.toLowerCase();
235
- if (["h1", "h2", "h3", "h4", "h5", "h6", "label", "legend", "figcaption", "caption", "th", "li"].includes(tag))
236
- return true;
237
- try {
238
- if (parent.querySelector(interactiveSel))
239
- return true;
240
- if (parent.parentElement?.querySelector(interactiveSel))
241
- return true;
242
- }
243
- catch { /* selector might fail on exotic DOMs */ }
244
- return false;
245
- };
246
- const SELF_CLOSING = new Set(["input", "img", "br", "hr"]);
247
- const lines = [];
248
- const elements = [];
249
- const counters = { B: 0, L: 0, I: 0, M: 0 };
250
- const walk = (node) => {
251
- if (node.nodeType === Node.TEXT_NODE) {
252
- const text = (node.textContent ?? "").replace(/\s+/g, " ").trim();
253
- if (text.length < 2)
254
- return;
255
- const parent = node.parentElement;
256
- if (!parent)
257
- return;
258
- if (parent.tagName === "SCRIPT" || parent.tagName === "STYLE" || parent.tagName === "NOSCRIPT")
259
- return;
260
- if (interactiveSet.has(parent))
261
- return; // text goes inside the tag
262
- if (!isVisible(parent))
263
- return;
264
- if (!isNearInteractive(parent))
265
- return;
266
- lines.push(`_[:] ${text.slice(0, 150)}`);
267
- return;
268
- }
269
- if (node.nodeType !== Node.ELEMENT_NODE)
270
- return;
271
- const el = node;
272
- const tag = el.tagName.toLowerCase();
273
- if (tag === "script" || tag === "style" || tag === "noscript")
274
- return;
275
- if (interactiveSet.has(el)) {
276
- if (!isVisible(el))
277
- return;
278
- const cat = categorize(el);
279
- counters[cat]++;
280
- const id = `${cat}${counters[cat]}`;
281
- const attrs = getAttrs(el);
282
- const text = getInnerText(el);
283
- const htmlTag = tag === "a" ? "a" : tag;
284
- // Mark element for Playwright selector lookup
285
- el.setAttribute("data-rid", id);
286
- const attrParts = [];
287
- for (const key of ATTR_RENDER_ORDER) {
288
- if (attrs[key])
289
- attrParts.push(`${key}="${attrs[key]}"`);
290
- }
291
- const attrStr = attrParts.length > 0 ? " " + attrParts.join(" ") : "";
292
- if (SELF_CLOSING.has(htmlTag)) {
293
- lines.push(`${id}[:] <${htmlTag}${attrStr}/>`);
294
- }
295
- else {
296
- lines.push(`${id}[:] <${htmlTag}${attrStr}>${text}</${htmlTag}>`);
297
- }
298
- elements.push({ id, selector: `[data-rid="${id}"]` });
299
- return; // don't recurse into interactive elements
300
- }
301
- for (const child of Array.from(el.childNodes)) {
302
- walk(child);
303
- }
304
- };
305
- walk(document.body);
306
- return { lines, elements };
307
- }, INTERACTIVE_SEL);
308
- this.lastInteractiveElements = new Map(result.elements.map((e) => [e.id, e.selector]));
309
- return {
310
- dom: result.lines.join("\n"),
311
- elements: result.elements,
312
- };
313
- }
314
- // ─── Intelligent DOM scrape — Notte-style content understanding ──────────────
315
- // Passes visible page text to Claude Sonnet and asks whether the goal data is present.
316
- // If not found, follows nextUrl suggestions up to maxHops times before giving up.
317
- // Caller must supply initialContent (from extractPageContent()) to avoid a double call.
318
- async scrapeWithGoal(goal, initialContent, seedValues, maxHops = 3, initialLinks) {
319
- let content = initialContent;
320
- let links = initialLinks;
321
- for (let hop = 0; hop <= maxHops; hop++) {
322
- const result = await this.ai.scrapePageContent(goal, content, seedValues, links);
323
- if (result.found || !result.nextUrl || hop === maxHops) {
324
- return result;
325
- }
326
- // Follow the suggested link and re-scrape
90
+ // ─── Interaction ──────────────────────────────────────────────────────────────
91
+ async clickById(elementId) {
92
+ // First try cached CSS selector (reliable)
93
+ const selector = this.observer.getSelector(elementId);
94
+ if (selector) {
327
95
  try {
328
- await this.goto(result.nextUrl);
329
- const page = await this.extractPageWithLinks();
330
- content = page.content;
331
- links = page.links;
96
+ await this.page.click(selector);
97
+ return;
332
98
  }
333
99
  catch {
334
- return result;
100
+ // Fall through to coordinate-based click
335
101
  }
336
102
  }
337
- return { found: false, data: null, nextUrl: null, reasoning: "max hops reached without finding data" };
103
+ const coords = await this.observer.resolveElementId(elementId);
104
+ if (!coords)
105
+ throw new Error(`Element ${elementId} not found in DOM`);
106
+ await this.page.mouse.click(coords.x, coords.y);
338
107
  }
339
- // ─── Interaction ──────────────────────────────────────────────────────────────
340
- // Click by element ID checks stored selectors from observeInteractiveDom()
341
- // first, then falls back to coordinate-based resolution from observe().
342
- async clickById(elementId) {
343
- const pagesBefore = this.page.context().pages().length;
344
- const selector = this.lastInteractiveElements.get(elementId);
108
+ async fillById(elementId, value) {
109
+ // First try cached CSS selector (reliable)
110
+ const selector = this.observer.getSelector(elementId);
345
111
  if (selector) {
346
- await this.page.locator(selector).click();
347
- }
348
- else {
349
- const coords = await this.observer.resolveElementId(elementId);
350
- if (!coords)
351
- throw new Error(`Element ${elementId} not found in DOM`);
352
- await this.page.mouse.click(coords.x, coords.y);
112
+ try {
113
+ await this.page.click(selector, { clickCount: 3 }); // select all
114
+ await this.page.keyboard.press("Backspace");
115
+ await this.page.type(selector, value, { delay: 80 });
116
+ return;
117
+ }
118
+ catch {
119
+ // Fall through to coordinate-based fill
120
+ }
353
121
  }
354
- await this.switchToNewTabIfOpened(pagesBefore);
355
- }
356
- // Fill input by element ID from observe()
357
- async fillById(elementId, value) {
358
122
  const coords = await this.observer.resolveElementId(elementId);
359
123
  if (!coords)
360
124
  throw new Error(`Element ${elementId} not found in DOM`);
361
125
  await this.page.mouse.click(coords.x, coords.y);
362
126
  await this.page.keyboard.press("Control+a");
363
- await this.page.keyboard.type(value);
127
+ await this.page.keyboard.type(value, { delay: 80 });
128
+ }
129
+ // ─── Direct selector interaction (for autocomplete clicks) ──────────────────
130
+ async clickSelector(selector) {
131
+ const el = await this.page.waitForSelector(selector, { timeout: 3_000, visible: true });
132
+ if (el) {
133
+ await el.click();
134
+ }
135
+ else {
136
+ throw new Error(`Selector ${selector} not found`);
137
+ }
364
138
  }
365
139
  // ─── Captured requests ────────────────────────────────────────────────────────
366
- // All captured requests sorted by relevance score
367
140
  getCapturedRequests() {
368
141
  return this.interceptor.getAll();
369
142
  }
370
- // Only useful XHR/fetch requests above score threshold
371
143
  getUsefulRequests(minScore = 1) {
372
144
  return this.interceptor.getUseful(minScore);
373
145
  }
374
- // Only requests that fired as a direct result of a user action
375
146
  getActionTriggeredRequests() {
376
147
  return this.interceptor.getActionTriggered();
377
148
  }
378
- // Listen to requests in real-time as they come in
379
149
  onRequest(handler) {
380
150
  this.interceptor.on("request", handler);
381
151
  }
152
+ hasSeedValueInResponses(seedValues) {
153
+ return this.interceptor.hasSeedValueInResponses(seedValues);
154
+ }
155
+ markActionTimestamp() {
156
+ this.interceptor.markActionTimestamp();
157
+ }
382
158
  // ─── act() — achieve a goal through multi-step navigation ────────────────────
383
159
  async act(goal, maxSteps = 10, seedValues) {
384
160
  const steps = [];
385
- const trajectory = [];
386
- let consecutiveFailures = 0;
387
- const MAX_CONSECUTIVE_FAILURES = 3;
161
+ const history = [];
388
162
  for (let i = 0; i < maxSteps; i++) {
389
- // ALWAYS observe first — no screenshot by default
390
163
  const observation = await this.observe();
391
- let completion;
164
+ let action;
392
165
  let usedScreenshot = false;
393
- // If observe() gives us enough elements, use text-based decision
394
166
  if (observation.elements.filter((e) => e.visible).length > 0) {
395
- completion = await this.ai.decideFromObservation(goal, observation, trajectory, i + 1, maxSteps, seedValues);
167
+ action = await this.ai.decideFromObservation(goal, observation, history, seedValues);
396
168
  }
397
169
  else {
398
- // Fallback: page has no readable elements (canvas, iframe, etc.) → screenshot
399
170
  const shot = await this.screenshot();
400
- completion = await this.ai.decideFromScreenshot(goal, shot.base64, trajectory, i + 1, maxSteps, seedValues);
171
+ action = await this.ai.decideFromScreenshot(goal, shot.base64, history, seedValues);
401
172
  usedScreenshot = true;
402
173
  }
403
- const { action, state } = completion;
404
174
  steps.push({ action, observeBefore: observation, usedScreenshot });
405
- // Track trajectory for multi-turn conversation (assistant response, then next user observation)
406
- trajectory.push({
407
- role: "assistant",
408
- content: JSON.stringify(completion),
409
- });
410
- // Track consecutive failures
411
- if (state.previous_goal_status === "failure") {
412
- consecutiveFailures++;
413
- if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
414
- return {
415
- success: false,
416
- steps,
417
- finalUrl: this.url,
418
- reason: `${MAX_CONSECUTIVE_FAILURES} consecutive failures: ${state.previous_goal_eval}`,
419
- };
420
- }
421
- }
422
- else if (state.previous_goal_status === "success") {
423
- consecutiveFailures = 0;
424
- }
425
- // Terminal states
175
+ history.push(`Step ${i + 1}: ${action.type} ${action.reason}`);
426
176
  if (action.type === "done") {
427
177
  return { success: true, steps, finalUrl: this.url, reason: action.reason };
428
178
  }
429
179
  if (action.type === "impossible") {
430
180
  return { success: false, steps, finalUrl: this.url, reason: action.reason };
431
181
  }
432
- // Execute action
433
- const actionSuccess = await this.executeAction(action);
434
- // Add execution result to trajectory
435
- const resultMsg = actionSuccess
436
- ? `Action '${action.type}' succeeded`
437
- : `Action '${action.type}' failed`;
438
- trajectory.push({ role: "user", content: resultMsg });
439
- // Short wait after interactions for page to settle
182
+ await this.executeAction(action);
440
183
  await this.page.waitForTimeout(500);
441
184
  }
442
185
  return {
@@ -447,10 +190,7 @@ class Session {
447
190
  };
448
191
  }
449
192
  // ─── findData() — navigate + capture requests until goal achieved ─────────────
450
- // seedValues: concrete example inputs to use during discovery (e.g. what to type in a search box)
451
193
  async findData(goal, maxSteps = 15, seedValues) {
452
- // Do NOT clear — requests captured during goto() (t=0) are valuable.
453
- // goto() already waits for networkidle + 800ms minimum, so no additional delay needed here.
454
194
  const actResult = await this.act(goal, maxSteps, seedValues);
455
195
  const capturedRequests = this.interceptor.getUseful();
456
196
  return {
@@ -461,84 +201,60 @@ class Session {
461
201
  // ─── Cleanup ──────────────────────────────────────────────────────────────────
462
202
  async close() {
463
203
  this.interceptor.stop();
464
- await this.page.close();
204
+ if (!this.page.isClosed()) {
205
+ await this.page.close();
206
+ }
465
207
  }
466
208
  async closeAll() {
467
209
  this.interceptor.stop();
468
- await this.browser.close();
469
- }
470
- // ─── New tab detection ─────────────────────────────────────────────────────────
471
- // After a click, check if a new tab was opened and switch the session to it.
472
- // Rebuilds interceptor/observer so they reference the new page.
473
- async switchToNewTabIfOpened(pagesBeforeCount) {
474
- await this.page.waitForTimeout(1000);
475
- const pages = this.page.context().pages();
476
- if (pages.length <= pagesBeforeCount)
477
- return;
478
- // Switch to the newest tab
479
- const newPage = pages[pages.length - 1];
480
- if (newPage === this.page)
481
- return;
482
- // Tear down interceptor on old page, rebuild on new page
483
- this.interceptor.stop();
484
- this.page = newPage;
485
- this.interceptor = new interceptor_1.Interceptor(this.page);
486
- this.observer = new observer_1.Observer(this.page);
487
- this.interceptor.start();
488
- try {
489
- await this.page.waitForLoadState("domcontentloaded", { timeout: this.defaultTimeout });
490
- }
491
- catch {
492
- // Partial load — continue with what we have
493
- }
210
+ await this.browserHandle.close();
494
211
  }
495
212
  // ─── Execute action ───────────────────────────────────────────────────────────
496
213
  async executeAction(action) {
497
- try {
498
- switch (action.type) {
499
- case "click":
500
- this.interceptor.markActionTimestamp();
501
- await this.clickById(action.elementId).catch(async () => {
502
- await this.page.getByText(action.elementId).first().click();
503
- });
504
- break;
505
- case "fill":
506
- this.interceptor.markActionTimestamp();
507
- await this.fillById(action.elementId, action.value).catch(async () => {
508
- await this.page.getByRole("textbox").first().fill(action.value);
509
- });
510
- break;
511
- case "scroll":
512
- this.interceptor.markActionTimestamp();
513
- await this.page.evaluate(({ direction, amount }) => {
514
- window.scrollBy(0, direction === "down" ? amount : -amount);
515
- }, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
516
- // Execution context destroyed = page navigated mid-scroll. Not a crash — continue.
517
- });
518
- break;
519
- case "navigate":
520
- this.interceptor.markActionTimestamp();
521
- await this.goto(action.url);
522
- break;
523
- case "press_key":
524
- await this.page.keyboard.press(action.key);
525
- break;
526
- case "wait":
527
- await this.page.waitForTimeout(action.ms);
528
- break;
529
- case "scrape":
530
- // Scrape is handled by the caller — no-op here
531
- break;
532
- case "screenshot":
533
- // Already handled in act() loop
534
- break;
535
- default:
536
- break;
537
- }
538
- return true;
539
- }
540
- catch {
541
- return false;
214
+ switch (action.type) {
215
+ case "click":
216
+ this.interceptor.markActionTimestamp();
217
+ await this.clickById(action.elementId).catch(() => {
218
+ // If all resolution fails, log and continue
219
+ console.warn(`[Session] Click failed for ${action.elementId}`);
220
+ });
221
+ break;
222
+ case "fill":
223
+ this.interceptor.markActionTimestamp();
224
+ await this.fillById(action.elementId, action.value).catch(() => {
225
+ console.warn(`[Session] Fill failed for ${action.elementId}`);
226
+ });
227
+ break;
228
+ case "scroll":
229
+ this.interceptor.markActionTimestamp();
230
+ await this.page.evaluate(({ direction, amount }) => {
231
+ window.scrollBy(0, direction === "down" ? amount : -amount);
232
+ }, { direction: action.direction, amount: action.amount ?? 300 }).catch(() => {
233
+ // Execution context destroyed = page navigated mid-scroll
234
+ });
235
+ break;
236
+ case "navigate":
237
+ this.interceptor.markActionTimestamp();
238
+ try {
239
+ await this.page.goto(action.url, { waitUntil: "domcontentloaded", timeout: this.defaultTimeout });
240
+ // Wait for network idle like Session.goto() does
241
+ try {
242
+ await this.page.waitForNetworkIdle({ timeout: 8_000 });
243
+ }
244
+ catch { /* background polling — continue */ }
245
+ await this.page.waitForTimeout(800);
246
+ }
247
+ catch (err) {
248
+ console.warn(`[Session] Navigate failed for ${action.url}:`, err.message);
249
+ }
250
+ break;
251
+ case "wait":
252
+ await this.page.waitForTimeout(action.ms);
253
+ break;
254
+ case "screenshot":
255
+ break;
256
+ default:
257
+ break;
542
258
  }
543
259
  }
544
260
  }