barebrowse 0.7.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -8,7 +8,7 @@
8
8
  * const snapshot = await browse('https://example.com');
9
9
  */
10
10
 
11
- import { launch } from './chromium.js';
11
+ import { launch, attach, cleanupBrowser } from './chromium.js';
12
12
  import { createCDP } from './cdp.js';
13
13
  import { formatTree } from './aria.js';
14
14
  import { authenticate } from './auth.js';
@@ -16,6 +16,8 @@ import { prune as pruneTree } from './prune.js';
16
16
  import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress, hover as cdpHover, select as cdpSelect, drag as cdpDrag, upload as cdpUpload } from './interact.js';
17
17
  import { dismissConsent } from './consent.js';
18
18
  import { applyStealth } from './stealth.js';
19
+ import { waitForNetworkIdle } from './network-idle.js';
20
+ import { join as pathJoin } from 'node:path';
19
21
 
20
22
  /**
21
23
  * Browse a URL and return an ARIA snapshot.
@@ -35,15 +37,18 @@ export async function browse(url, opts = {}) {
35
37
 
36
38
  let browser = null;
37
39
  let cdp = null;
40
+ // Forward caller-supplied launch knobs (binary, userDataDir, proxy) into
41
+ // every launch() call below, including hybrid-fallback re-launches.
42
+ const launchOpts = { proxy: opts.proxy, binary: opts.binary, userDataDir: opts.userDataDir };
38
43
 
39
44
  try {
40
45
  // Step 1: Get a CDP connection
41
46
  if (mode === 'headed') {
42
- browser = await launch({ headed: true, proxy: opts.proxy });
47
+ browser = await launch({ ...launchOpts, headed: true });
43
48
  cdp = await createCDP(browser.wsUrl);
44
49
  } else {
45
50
  // headless or hybrid (start headless)
46
- browser = await launch({ proxy: opts.proxy });
51
+ browser = await launch(launchOpts);
47
52
  cdp = await createCDP(browser.wsUrl);
48
53
  }
49
54
 
@@ -77,10 +82,10 @@ export async function browse(url, opts = {}) {
77
82
  if (mode === 'hybrid' && isChallengePage(tree, nodeCount)) {
78
83
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
79
84
  cdp.close();
80
- if (browser) { browser.process.kill(); browser = null; }
85
+ await cleanupBrowser(browser); browser = null;
81
86
 
82
87
  try {
83
- browser = await launch({ headed: true, proxy: opts.proxy });
88
+ browser = await launch({ ...launchOpts, headed: true });
84
89
  cdp = await createCDP(browser.wsUrl);
85
90
  page = await createPage(cdp, false, { viewport: opts.viewport });
86
91
  await suppressPermissions(cdp);
@@ -105,7 +110,11 @@ export async function browse(url, opts = {}) {
105
110
  snapshot = raw;
106
111
  }
107
112
  const stats = `url: ${url}\n${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
108
- snapshot = stats + '\n' + snapshot;
113
+ const actMode = !opts.pruneMode || opts.pruneMode === 'act';
114
+ const hint = (actMode && raw.length > 5000 && snapshot.length < 500 && snapshot.length < raw.length * 0.05)
115
+ ? `hint: act mode dropped most of the page — retry with pruneMode='read' for paragraphs and long text\n`
116
+ : '';
117
+ snapshot = stats + '\n' + hint + snapshot;
109
118
 
110
119
  // Step 7: Clean up
111
120
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
@@ -113,7 +122,7 @@ export async function browse(url, opts = {}) {
113
122
  return snapshot;
114
123
  } finally {
115
124
  if (cdp) cdp.close();
116
- if (browser) browser.process.kill();
125
+ await cleanupBrowser(browser);
117
126
  }
118
127
  }
119
128
 
@@ -122,28 +131,54 @@ export async function browse(url, opts = {}) {
122
131
  *
123
132
  * @param {object} [opts]
124
133
  * @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
134
+ * @param {number} [opts.port] - Attach to an already-running Chromium at this
135
+ * CDP port instead of launching a new one. The browser keeps running on
136
+ * close(); only the tab we created is torn down. Use this to drive a
137
+ * user's logged-in session (start Chromium with --remote-debugging-port=N).
138
+ * @param {string} [opts.downloadPath] - Directory to save downloaded files.
139
+ * Default: a per-session subdirectory under the OS temp dir. Downloads
140
+ * land here as <guid>; check `page.downloads` for { url, suggestedFilename,
141
+ * savedPath, state, totalBytes, receivedBytes } per file.
125
142
  * @returns {Promise<object>} Page handle with goto, snapshot, close
126
143
  */
127
144
  export async function connect(opts = {}) {
128
145
  const mode = opts.mode || 'headless';
146
+ const attachMode = !!opts.port;
129
147
  let browser = null;
130
148
  let cdp;
131
-
132
- if (mode === 'headed') {
133
- browser = await launch({ headed: true, proxy: opts.proxy });
149
+ // Forward caller-supplied launch knobs into every launch() below,
150
+ // including hybrid-fallback re-launches inside goto().
151
+ const launchOpts = { proxy: opts.proxy, binary: opts.binary, userDataDir: opts.userDataDir };
152
+
153
+ if (attachMode) {
154
+ // Reuse the user's running browser — do not launch, do not own the
155
+ // profile. cleanupBrowser() is a no-op on this shape (process: null,
156
+ // ownedProfileDir: null), which is the whole point.
157
+ browser = await attach({ port: opts.port });
158
+ cdp = await createCDP(browser.wsUrl);
159
+ } else if (mode === 'headed') {
160
+ browser = await launch({ ...launchOpts, headed: true });
134
161
  cdp = await createCDP(browser.wsUrl);
135
162
  } else {
136
- browser = await launch({ proxy: opts.proxy });
163
+ browser = await launch(launchOpts);
137
164
  cdp = await createCDP(browser.wsUrl);
138
165
  }
139
166
 
140
- let currentlyHeaded = (mode === 'headed');
167
+ // In attach mode we don't know (and shouldn't assume) the user's headed/
168
+ // headless state — treat it as headed so stealth patches are skipped
169
+ // (they'd persist in the user's session via addScriptToEvaluateOnNewDocument)
170
+ // and the headed→headless rewind in goto() is gated off below.
171
+ let currentlyHeaded = attachMode || (mode === 'headed');
141
172
  let page = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
142
173
  let refMap = new Map();
143
174
  let botBlocked = false;
144
175
 
145
- // Suppress permission prompts for all modes
146
- await suppressPermissions(cdp);
176
+ // Suppress permission prompts. Skipped in attach mode — Browser.setPermission
177
+ // is browser-wide (no origin scope here), so flipping permissions to denied
178
+ // would leak into the user's other tabs.
179
+ if (!attachMode) {
180
+ await suppressPermissions(cdp);
181
+ }
147
182
 
148
183
  // Load storage state (cookies + localStorage) from file
149
184
  if (opts.storageState) {
@@ -156,8 +191,72 @@ export async function connect(opts = {}) {
156
191
  } catch { /* file not found or invalid — continue without */ }
157
192
  }
158
193
 
159
- // Auto-dismiss JS dialogs (alert, confirm, prompt)
194
+ // Download tracking wire Browser.setDownloadBehavior so files actually
195
+ // land on disk (default Chromium would route them to ~/Downloads or
196
+ // nowhere useful in headless), and listen for downloadWillBegin /
197
+ // downloadProgress so callers can read `page.downloads` to know what
198
+ // arrived. In attach mode we don't change the user's running browser's
199
+ // download dir — they almost certainly have an existing preference.
200
+ const downloads = [];
201
+ let ownedDownloadDir = null;
202
+ if (!attachMode) {
203
+ let downloadPath = opts.downloadPath;
204
+ if (!downloadPath) {
205
+ const { mkdtempSync } = await import('node:fs');
206
+ const { tmpdir } = await import('node:os');
207
+ ownedDownloadDir = mkdtempSync(pathJoin(tmpdir(), 'barebrowse-dl-'));
208
+ downloadPath = ownedDownloadDir;
209
+ }
210
+ // Register listeners BEFORE sending setDownloadBehavior so no
211
+ // downloadWillBegin / downloadProgress event can fire into a session
212
+ // without subscribers — about:blank can't initiate a download so the
213
+ // window is microscopic in practice, but ordering it correctly costs
214
+ // nothing.
215
+ cdp.on('Browser.downloadWillBegin', (params) => {
216
+ downloads.push({
217
+ guid: params.guid,
218
+ url: params.url,
219
+ suggestedFilename: params.suggestedFilename,
220
+ savedPath: pathJoin(downloadPath, params.guid),
221
+ state: 'inProgress',
222
+ totalBytes: 0,
223
+ receivedBytes: 0,
224
+ });
225
+ });
226
+ cdp.on('Browser.downloadProgress', (params) => {
227
+ const d = downloads.find((x) => x.guid === params.guid);
228
+ if (!d) return;
229
+ d.state = params.state; // 'inProgress' | 'completed' | 'canceled'
230
+ d.totalBytes = params.totalBytes;
231
+ d.receivedBytes = params.receivedBytes;
232
+ });
233
+ try {
234
+ // 'allowAndName' names saved files by guid for a stable, predictable
235
+ // path; the suggested filename is still surfaced on the download record.
236
+ await cdp.send('Browser.setDownloadBehavior', {
237
+ behavior: 'allowAndName', downloadPath, eventsEnabled: true,
238
+ });
239
+ } catch {
240
+ // Older Chrome may not accept 'allowAndName' — fall back to 'allow'
241
+ // which uses the suggested filename verbatim (no GUID).
242
+ try {
243
+ await cdp.send('Browser.setDownloadBehavior', {
244
+ behavior: 'allow', downloadPath, eventsEnabled: true,
245
+ });
246
+ } catch {
247
+ // Download capture unavailable on this Chrome — downloads still
248
+ // happen, we just can't observe them. page.downloads stays empty.
249
+ }
250
+ }
251
+ }
252
+
253
+ // JS dialog handling (alert, confirm, prompt, beforeunload). Default is
254
+ // auto-accept everything except beforeunload (auto-dismiss). The caller
255
+ // can install a custom decision via page.onDialog(handler) — the handler
256
+ // gets { type, message, defaultPrompt } and may return
257
+ // { accept: bool, promptText: string } to override.
160
258
  const dialogLog = [];
259
+ let onDialogHandler = null;
161
260
  function setupDialogHandler(session) {
162
261
  session.on('Page.javascriptDialogOpening', async (params) => {
163
262
  dialogLog.push({
@@ -165,23 +264,45 @@ export async function connect(opts = {}) {
165
264
  message: params.message,
166
265
  timestamp: new Date().toISOString(),
167
266
  });
168
- await session.send('Page.handleJavaScriptDialog', {
169
- accept: params.type !== 'beforeunload',
170
- promptText: params.defaultPrompt || '',
171
- });
267
+ let accept = params.type !== 'beforeunload';
268
+ let promptText = params.defaultPrompt || '';
269
+ if (onDialogHandler) {
270
+ try {
271
+ const decision = await onDialogHandler({
272
+ type: params.type,
273
+ message: params.message,
274
+ defaultPrompt: params.defaultPrompt || '',
275
+ });
276
+ if (decision && typeof decision === 'object') {
277
+ if (typeof decision.accept === 'boolean') accept = decision.accept;
278
+ if (typeof decision.promptText === 'string') promptText = decision.promptText;
279
+ }
280
+ } catch {
281
+ // Handler threw — fall back to defaults so the page doesn't hang
282
+ // waiting for a never-arriving handleJavaScriptDialog reply.
283
+ }
284
+ }
285
+ await session.send('Page.handleJavaScriptDialog', { accept, promptText });
172
286
  });
173
287
  }
174
288
  setupDialogHandler(page.session);
175
289
 
176
290
  return {
177
291
  async goto(url, timeout = 30000) {
178
- // Switch back to headless if we fell back to headed previously
179
- if (currentlyHeaded && mode === 'hybrid') {
292
+ // Refs from the previous page are about to become invalid — clear
293
+ // before navigating so a stale click(ref) errors clearly instead of
294
+ // silently resolving to whatever backendNodeId happens to still be in
295
+ // the map.
296
+ refMap = new Map();
297
+ // Switch back to headless if we fell back to headed previously.
298
+ // Not in attach mode — we never own the browser there, so there's
299
+ // nothing to rewind.
300
+ if (currentlyHeaded && mode === 'hybrid' && !attachMode) {
180
301
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
181
302
  cdp.close();
182
- if (browser) { browser.process.kill(); browser = null; }
303
+ await cleanupBrowser(browser); browser = null;
183
304
 
184
- browser = await launch({ proxy: opts.proxy });
305
+ browser = await launch(launchOpts);
185
306
  cdp = await createCDP(browser.wsUrl);
186
307
  page = await createPage(cdp, true, { viewport: opts.viewport });
187
308
  setupDialogHandler(page.session);
@@ -198,14 +319,16 @@ export async function connect(opts = {}) {
198
319
  const { tree, nodeCount } = await ariaTree(page);
199
320
  botBlocked = isChallengePage(tree, nodeCount);
200
321
 
201
- // Hybrid fallback: if bot-blocked, retry with headed browser
202
- if (botBlocked && mode === 'hybrid') {
322
+ // Hybrid fallback: if bot-blocked, retry with headed browser.
323
+ // Suppressed in attach mode we can't tear down the user's running
324
+ // browser and we don't know what mode they started it in.
325
+ if (botBlocked && mode === 'hybrid' && !attachMode) {
203
326
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
204
327
  cdp.close();
205
- if (browser) { browser.process.kill(); browser = null; }
328
+ await cleanupBrowser(browser); browser = null;
206
329
 
207
330
  try {
208
- browser = await launch({ headed: true, proxy: opts.proxy });
331
+ browser = await launch({ ...launchOpts, headed: true });
209
332
  cdp = await createCDP(browser.wsUrl);
210
333
  page = await createPage(cdp, false, { viewport: opts.viewport });
211
334
  setupDialogHandler(page.session);
@@ -226,15 +349,29 @@ export async function connect(opts = {}) {
226
349
  async goBack() {
227
350
  const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
228
351
  if (currentIndex <= 0) throw new Error('No previous page in history');
352
+ const loadPromise = page.session.once('Page.loadEventFired', 30000);
229
353
  await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex - 1].id });
230
- await new Promise((r) => setTimeout(r, 500));
354
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
355
+ refMap = new Map(); // refs from the previous page are now invalid
231
356
  },
232
357
 
233
358
  async goForward() {
234
359
  const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
235
360
  if (currentIndex >= entries.length - 1) throw new Error('No next page in history');
361
+ const loadPromise = page.session.once('Page.loadEventFired', 30000);
236
362
  await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex + 1].id });
237
- await new Promise((r) => setTimeout(r, 500));
363
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
364
+ refMap = new Map();
365
+ },
366
+
367
+ async reload(reloadOpts = {}) {
368
+ const timeout = reloadOpts.timeout || 30000;
369
+ const loadPromise = page.session.once('Page.loadEventFired', timeout);
370
+ await page.session.send('Page.reload', {
371
+ ignoreCache: !!reloadOpts.ignoreCache,
372
+ });
373
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
374
+ refMap = new Map(); // refs from the pre-reload page are invalid
238
375
  },
239
376
 
240
377
  async injectCookies(url, cookieOpts) {
@@ -249,22 +386,26 @@ export async function connect(opts = {}) {
249
386
  const pageUrl = entries[currentIndex]?.url || '';
250
387
  const warn = botBlocked ? '[BOT CHALLENGE DETECTED — page content may be incomplete or blocked]\n' : '';
251
388
  if (pruneOpts === false) return `url: ${pageUrl}\n` + warn + raw;
252
- const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
389
+ const mode = pruneOpts?.mode || 'act';
390
+ const pruned = pruneTree(result.tree, { mode });
253
391
  const out = formatTree(pruned);
254
392
  const stats = `url: ${pageUrl}\n${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
255
- return stats + '\n' + warn + out;
393
+ const hint = (mode === 'act' && raw.length > 5000 && out.length < 500 && out.length < raw.length * 0.05)
394
+ ? `hint: act mode dropped most of the page — retry with pruneMode='read' for paragraphs and long text\n`
395
+ : '';
396
+ return stats + '\n' + hint + warn + out;
256
397
  },
257
398
 
258
399
  async click(ref) {
259
- const backendNodeId = refMap.get(ref);
260
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
261
- await cdpClick(page.session, backendNodeId);
400
+ const entry = refMap.get(ref);
401
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
402
+ await cdpClick(entry.session, entry.backendNodeId);
262
403
  },
263
404
 
264
405
  async type(ref, text, typeOpts) {
265
- const backendNodeId = refMap.get(ref);
266
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
267
- await cdpType(page.session, backendNodeId, text, typeOpts);
406
+ const entry = refMap.get(ref);
407
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
408
+ await cdpType(entry.session, entry.backendNodeId, text, typeOpts);
268
409
  },
269
410
 
270
411
  async scroll(deltaY) {
@@ -276,29 +417,34 @@ export async function connect(opts = {}) {
276
417
  },
277
418
 
278
419
  async hover(ref) {
279
- const backendNodeId = refMap.get(ref);
280
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
281
- await cdpHover(page.session, backendNodeId);
420
+ const entry = refMap.get(ref);
421
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
422
+ await cdpHover(entry.session, entry.backendNodeId);
282
423
  },
283
424
 
284
425
  async select(ref, value) {
285
- const backendNodeId = refMap.get(ref);
286
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
287
- await cdpSelect(page.session, backendNodeId, value);
426
+ const entry = refMap.get(ref);
427
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
428
+ await cdpSelect(entry.session, entry.backendNodeId, value);
288
429
  },
289
430
 
290
431
  async drag(fromRef, toRef) {
291
- const fromId = refMap.get(fromRef);
292
- const toId = refMap.get(toRef);
293
- if (!fromId) throw new Error(`No element found for ref "${fromRef}"`);
294
- if (!toId) throw new Error(`No element found for ref "${toRef}"`);
295
- await cdpDrag(page.session, fromId, toId);
432
+ const from = refMap.get(fromRef);
433
+ const to = refMap.get(toRef);
434
+ if (!from) throw new Error(`No element found for ref "${fromRef}"`);
435
+ if (!to) throw new Error(`No element found for ref "${toRef}"`);
436
+ // Drag across different frames isn't physically meaningful — bail
437
+ // rather than mix sessions and produce nonsense coordinates.
438
+ if (from.session !== to.session) {
439
+ throw new Error('drag() between elements in different frames is not supported');
440
+ }
441
+ await cdpDrag(from.session, from.backendNodeId, to.backendNodeId);
296
442
  },
297
443
 
298
444
  async upload(ref, files) {
299
- const backendNodeId = refMap.get(ref);
300
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
301
- await cdpUpload(page.session, backendNodeId, files);
445
+ const entry = refMap.get(ref);
446
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
447
+ await cdpUpload(entry.session, entry.backendNodeId, files);
302
448
  },
303
449
 
304
450
  async pdf(pdfOpts = {}) {
@@ -320,7 +466,17 @@ export async function connect(opts = {}) {
320
466
  const { targetInfos } = await cdp.send('Target.getTargets');
321
467
  const pages = targetInfos.filter((t) => t.type === 'page');
322
468
  if (index < 0 || index >= pages.length) throw new Error(`Tab index ${index} out of range (0-${pages.length - 1})`);
323
- await cdp.send('Target.activateTarget', { targetId: pages[index].targetId });
469
+ const target = pages[index];
470
+ await cdp.send('Target.activateTarget', { targetId: target.targetId });
471
+ if (target.targetId === page.targetId) return; // already on this tab
472
+ // Detach from old session, attach to new — the page variable is the
473
+ // closure handle used by every method below, so swapping it makes
474
+ // snapshot/click/type/etc. operate on the new tab.
475
+ const oldSessionId = page.sessionId;
476
+ page = await attachToExistingTarget(cdp, target.targetId);
477
+ refMap = new Map(); // refs from the previous tab are no longer valid
478
+ setupDialogHandler(page.session);
479
+ try { await cdp.send('Target.detachFromTarget', { sessionId: oldSessionId }); } catch {}
324
480
  },
325
481
 
326
482
  async waitFor(waitOpts = {}) {
@@ -363,6 +519,18 @@ export async function connect(opts = {}) {
363
519
 
364
520
  dialogLog,
365
521
 
522
+ /**
523
+ * Install a custom JS dialog handler. The handler is called with
524
+ * `{ type, message, defaultPrompt }` and may return (sync or async)
525
+ * `{ accept: bool, promptText: string }` to override the auto-accept
526
+ * default. Pass null to restore the default behavior.
527
+ */
528
+ onDialog(handler) {
529
+ onDialogHandler = handler;
530
+ },
531
+
532
+ downloads,
533
+
366
534
  async screenshot(screenshotOpts = {}) {
367
535
  const format = screenshotOpts.format || 'png';
368
536
  const params = { format };
@@ -389,12 +557,13 @@ export async function connect(opts = {}) {
389
557
  return waitForNetworkIdle(page.session, idleOpts);
390
558
  },
391
559
 
392
- /** Raw CDP session for escape hatch */
393
- cdp: page.session,
560
+ /** Raw CDP session for escape hatch — getter so it survives hybrid fallback / tab swaps */
561
+ get cdp() { return page.session; },
394
562
 
395
563
  async createTab() {
396
564
  const tab = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
397
565
  await suppressPermissions(cdp);
566
+ setupDialogHandler(tab.session);
398
567
  let tabBotBlocked = false;
399
568
  return {
400
569
  async goto(url, timeout = 30000) {
@@ -422,7 +591,15 @@ export async function connect(opts = {}) {
422
591
  async close() {
423
592
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
424
593
  cdp.close();
425
- if (browser) browser.process.kill();
594
+ await cleanupBrowser(browser);
595
+ // If we created the download dir ourselves, clean it up too. Caller-
596
+ // supplied opts.downloadPath stays — the caller owns the lifecycle.
597
+ if (ownedDownloadDir) {
598
+ try {
599
+ const { rmSync } = await import('node:fs');
600
+ rmSync(ownedDownloadDir, { recursive: true, force: true });
601
+ } catch {}
602
+ }
426
603
  },
427
604
  };
428
605
  }
@@ -486,7 +663,69 @@ async function createPage(cdp, stealth = false, pageOpts = {}) {
486
663
  }
487
664
  }
488
665
 
489
- return { session, targetId, sessionId };
666
+ // Track child frame sessions (OOPIF) so ariaTree() can read across frame
667
+ // boundaries. Same-origin iframes don't get their own session and stay
668
+ // queryable via the main session with a frameId param — see ariaTree().
669
+ const framesByFrameId = await attachFrameTracking(cdp, session);
670
+
671
+ return { session, targetId, sessionId, framesByFrameId };
672
+ }
673
+
674
+ /**
675
+ * Wire Target.setAutoAttach on a page session so every OOPIF child target gets
676
+ * its own CDP session, enabled and registered. Returns a live Map<frameId,
677
+ * { session, sessionId, targetId }> that updates as frames attach/detach.
678
+ */
679
+ async function attachFrameTracking(cdp, mainSession) {
680
+ const framesByFrameId = new Map();
681
+
682
+ mainSession.on('Target.attachedToTarget', async (params) => {
683
+ if (params.targetInfo?.type !== 'iframe') return;
684
+ const childSessionId = params.sessionId;
685
+ const childSession = cdp.session(childSessionId);
686
+ // For OOPIF, targetId === frameId — see CDP Target domain docs.
687
+ const frameId = params.targetInfo.targetId;
688
+ framesByFrameId.set(frameId, { session: childSession, sessionId: childSessionId, targetId: frameId });
689
+ // Enable domains on the child so we can read its AX tree.
690
+ // Recursively auto-attach so nested OOPIF iframes also get sessions.
691
+ try { await childSession.send('Page.enable'); } catch {}
692
+ try { await childSession.send('DOM.enable'); } catch {}
693
+ try {
694
+ await childSession.send('Target.setAutoAttach', {
695
+ autoAttach: true, flatten: true, waitForDebuggerOnStart: false,
696
+ });
697
+ } catch {}
698
+ try { await childSession.send('Runtime.runIfWaitingForDebugger'); } catch {}
699
+ });
700
+
701
+ mainSession.on('Target.detachedFromTarget', (params) => {
702
+ for (const [frameId, entry] of framesByFrameId) {
703
+ if (entry.sessionId === params.sessionId) {
704
+ framesByFrameId.delete(frameId);
705
+ return;
706
+ }
707
+ }
708
+ });
709
+
710
+ await mainSession.send('Target.setAutoAttach', {
711
+ autoAttach: true, flatten: true, waitForDebuggerOnStart: false,
712
+ });
713
+
714
+ return framesByFrameId;
715
+ }
716
+
717
+ /**
718
+ * Attach a CDP session to an existing target (e.g. a tab opened by window.open).
719
+ * Enables the same domains as createPage so snapshot/click/type work uniformly.
720
+ */
721
+ async function attachToExistingTarget(cdp, targetId) {
722
+ const { sessionId } = await cdp.send('Target.attachToTarget', { targetId, flatten: true });
723
+ const session = cdp.session(sessionId);
724
+ await session.send('Page.enable');
725
+ await session.send('Network.enable');
726
+ await session.send('DOM.enable');
727
+ const framesByFrameId = await attachFrameTracking(cdp, session);
728
+ return { session, targetId, sessionId, framesByFrameId };
490
729
  }
491
730
 
492
731
  /**
@@ -502,37 +741,111 @@ async function navigate(page, url, timeout = 30000) {
502
741
 
503
742
  /**
504
743
  * Get the ARIA accessibility tree for a page as a nested object.
744
+ *
745
+ * Walks every frame (main + iframes) via Page.getFrameTree, queries each
746
+ * frame's AX tree on the right session (child session for OOPIF, main
747
+ * session with frameId param for same-origin), and splices child frame
748
+ * trees under their iframe placeholders in the parent. Refs are assigned
749
+ * by a flat global counter so click/type/etc can resolve the right session
750
+ * without the agent having to think about frames at all.
505
751
  */
506
752
  async function ariaTree(page) {
507
- await page.session.send('Accessibility.enable');
508
- const { nodes } = await page.session.send('Accessibility.getFullAXTree');
509
- const tree = buildTree(nodes);
510
-
511
- // Build ref backendDOMNodeId map in one pass over raw CDP nodes
753
+ const main = page.session;
754
+ await main.send('Accessibility.enable');
755
+
756
+ // 1. Linearize the frame tree depth-first: index 0 is the main frame.
757
+ const { frameTree } = await main.send('Page.getFrameTree');
758
+ const frames = [];
759
+ (function walk(node, parentId) {
760
+ frames.push({ frame: node.frame, parentId });
761
+ for (const child of node.childFrames || []) walk(child, node.frame.id);
762
+ })(frameTree, null);
763
+
764
+ // 2. For each frame, fetch its AX nodes and build a tree. refMap value is
765
+ // { session, backendNodeId } so click(ref) routes to the right CDP
766
+ // session (essential for cross-process iframes). refCounter is shared
767
+ // across all frames in one snapshot — refs stay flat integers, so the
768
+ // visible [ref=N] format and existing agent prompts don't change.
512
769
  const refMap = new Map();
513
- for (const node of nodes) {
514
- if (node.backendDOMNodeId) {
515
- refMap.set(node.nodeId, node.backendDOMNodeId);
770
+ const treesByFrameId = new Map();
771
+ const sessionByFrameId = new Map();
772
+ const refCounter = { value: 1 };
773
+ let totalNodes = 0;
774
+
775
+ for (let i = 0; i < frames.length; i++) {
776
+ const { frame } = frames[i];
777
+ const childEntry = page.framesByFrameId?.get(frame.id);
778
+ const frameSession = childEntry ? childEntry.session : main;
779
+ sessionByFrameId.set(frame.id, frameSession);
780
+
781
+ let nodes = [];
782
+ try {
783
+ if (childEntry) {
784
+ // OOPIF — use the child session, no frameId param needed.
785
+ try { await frameSession.send('Accessibility.enable'); } catch {}
786
+ const res = await frameSession.send('Accessibility.getFullAXTree');
787
+ nodes = res.nodes;
788
+ } else {
789
+ // Main frame or same-origin child — query main session, scoping by
790
+ // frameId for children (Accessibility.getFullAXTree without frameId
791
+ // would just return the top frame, dropping same-origin iframe content).
792
+ const params = i === 0 ? {} : { frameId: frame.id };
793
+ const res = await main.send('Accessibility.getFullAXTree', params);
794
+ nodes = res.nodes;
795
+ }
796
+ } catch {
797
+ // Frame may have navigated mid-snapshot — skip it rather than fail
798
+ // the whole snapshot. The placeholder iframe node will simply have
799
+ // no children in the merged tree.
800
+ continue;
801
+ }
802
+
803
+ totalNodes += nodes.length;
804
+ const tree = buildTree(nodes, frameSession, refMap, refCounter);
805
+ if (tree) treesByFrameId.set(frame.id, tree);
806
+ }
807
+
808
+ // 3. Splice each child frame's tree under its iframe placeholder node in
809
+ // the parent. DOM.getFrameOwner gives the iframe element's
810
+ // backendNodeId in the parent's view; we match it against AX nodes.
811
+ for (const { frame, parentId } of frames) {
812
+ if (parentId === null) continue;
813
+ const parentTree = treesByFrameId.get(parentId);
814
+ const childTree = treesByFrameId.get(frame.id);
815
+ if (!parentTree || !childTree) continue;
816
+ const parentSession = sessionByFrameId.get(parentId);
817
+ try {
818
+ const { backendNodeId } = await parentSession.send('DOM.getFrameOwner', { frameId: frame.id });
819
+ const placeholder = findNodeByBackend(parentTree, backendNodeId);
820
+ if (placeholder) placeholder.children = [childTree];
821
+ } catch {
822
+ // Frame owner lookup failed — leave the iframe placeholder as-is.
516
823
  }
517
824
  }
518
825
 
519
- return { tree, refMap, nodeCount: nodes.length };
826
+ const root = treesByFrameId.get(frames[0].frame.id) || null;
827
+ return { tree: root, refMap, nodeCount: totalNodes };
520
828
  }
521
829
 
522
830
  /**
523
- * Transform CDP's flat AXNode array into a nested tree.
831
+ * Transform CDP's flat AXNode array into a nested tree. Every tree node gets
832
+ * a globally unique flat ref string from `refCounter` (shared across all
833
+ * frames in one snapshot), and refMap is populated with ref → { session,
834
+ * backendNodeId } so click/type can route to the right CDP session even when
835
+ * the element lives in an iframe.
524
836
  * CDP nodes have parentId — we use that exclusively to avoid double-linking.
525
837
  */
526
- function buildTree(nodes) {
838
+ function buildTree(nodes, session, refMap, refCounter) {
527
839
  if (!nodes || nodes.length === 0) return null;
528
840
 
529
841
  const nodeMap = new Map();
530
- const linked = new Set(); // track which nodes have been linked to a parent
842
+ const linked = new Set();
531
843
 
532
- // First pass: create tree nodes
844
+ // First pass: create tree nodes + populate refMap with flat global refs
533
845
  for (const node of nodes) {
846
+ const ref = String(refCounter.value++);
534
847
  nodeMap.set(node.nodeId, {
535
- nodeId: node.nodeId,
848
+ nodeId: ref,
536
849
  backendDOMNodeId: node.backendDOMNodeId,
537
850
  role: node.role?.value || '',
538
851
  name: node.name?.value || '',
@@ -540,6 +853,9 @@ function buildTree(nodes) {
540
853
  ignored: node.ignored || false,
541
854
  children: [],
542
855
  });
856
+ if (node.backendDOMNodeId && refMap) {
857
+ refMap.set(ref, { session, backendNodeId: node.backendDOMNodeId });
858
+ }
543
859
  }
544
860
 
545
861
  // Second pass: link via parentId only (avoids duplicates from childIds)
@@ -560,6 +876,16 @@ function buildTree(nodes) {
560
876
  return root;
561
877
  }
562
878
 
879
+ function findNodeByBackend(node, backendNodeId) {
880
+ if (!node) return null;
881
+ if (node.backendDOMNodeId === backendNodeId) return node;
882
+ for (const child of node.children || []) {
883
+ const found = findNodeByBackend(child, backendNodeId);
884
+ if (found) return found;
885
+ }
886
+ return null;
887
+ }
888
+
563
889
  function extractProps(props) {
564
890
  if (!props) return {};
565
891
  const result = {};
@@ -568,79 +894,58 @@ function extractProps(props) {
568
894
  }
569
895
 
570
896
  /**
571
- * Wait until no network requests are pending for `idle` ms.
572
- * @param {object} session - Session-scoped CDP handle
573
- * @param {object} [opts]
574
- * @param {number} [opts.timeout=30000] - Max wait time
575
- * @param {number} [opts.idle=500] - Idle threshold in ms
576
- */
577
- function waitForNetworkIdle(session, opts = {}) {
578
- const timeout = opts.timeout || 30000;
579
- const idle = opts.idle || 500;
580
-
581
- return new Promise((resolve, reject) => {
582
- let pending = 0;
583
- let timer = null;
584
- const unsubs = [];
585
-
586
- const done = () => {
587
- clearTimeout(timer);
588
- clearTimeout(deadlineTimer);
589
- for (const unsub of unsubs) unsub();
590
- resolve();
591
- };
592
-
593
- const check = () => {
594
- clearTimeout(timer);
595
- if (pending <= 0) {
596
- pending = 0;
597
- timer = setTimeout(done, idle);
598
- }
599
- };
600
-
601
- unsubs.push(session.on('Network.requestWillBeSent', () => { pending++; clearTimeout(timer); }));
602
- unsubs.push(session.on('Network.loadingFinished', () => { pending--; check(); }));
603
- unsubs.push(session.on('Network.loadingFailed', () => { pending--; check(); }));
604
-
605
- const deadlineTimer = setTimeout(() => {
606
- for (const unsub of unsubs) unsub();
607
- reject(new Error(`waitForNetworkIdle timed out after ${timeout}ms`));
608
- }, timeout);
609
-
610
- // Start check immediately (might already be idle)
611
- check();
612
- });
613
- }
614
-
615
- /**
616
- * Detect if a page is a bot-challenge page (Cloudflare, etc.).
617
- * Heuristic: low ARIA node count, short text, or known challenge phrases.
897
+ * Detect if a page is a bot-challenge page (Cloudflare, hCaptcha, etc.).
898
+ *
899
+ * Pre-H9 this was over-aggressive: `nodeCount < 50` alone fired on any
900
+ * legitimate small page (404s, simple landings, error pages), and generic
901
+ * phrases like "access denied" / "unknown error" / "permission denied"
902
+ * triggered on real HTTP 4xx/5xx pages, kicking hybrid mode into a costly
903
+ * headed fallback for nothing.
904
+ *
905
+ * H9 split: STRONG_PHRASES are essentially-unambiguous challenge UI and
906
+ * fire regardless of page size; WEAK_PHRASES only fire when the page is
907
+ * ALSO tiny (so a legitimate-looking error page with "access denied" in
908
+ * its body doesn't trip the fallback).
909
+ *
618
910
  * @param {object} tree - Nested ARIA tree (from buildTree)
619
911
  * @param {number} [nodeCount] - Raw CDP node count (from Accessibility.getFullAXTree)
620
912
  */
621
- function isChallengePage(tree, nodeCount) {
622
- if (!tree) return true;
623
- // Real pages have 50+ ARIA nodes. Bot challenges have <20.
624
- if (nodeCount !== undefined && nodeCount < 50) return true;
913
+ export function isChallengePage(tree, nodeCount) {
914
+ if (!tree) return true; // truly empty AX tree — something went wrong fetching the page
915
+
625
916
  const text = flattenTreeText(tree);
626
- // Near-empty pages are almost certainly blocks
627
- if (text.trim().length < 50) return true;
628
- const challengePhrases = [
629
- 'just a moment',
630
- 'checking if the site connection is secure',
631
- 'checking your browser',
632
- 'please wait',
633
- 'verify you are human',
917
+ const lower = text.toLowerCase();
918
+
919
+ // Strong phrases — distinctive enough to identify the challenge product
920
+ // by name. Fire on their own regardless of node count.
921
+ const STRONG_PHRASES = [
922
+ 'just a moment', // Cloudflare interstitial
923
+ 'checking if the site connection is secure', // Cloudflare
924
+ 'checking your browser', // Various JS challenges
925
+ 'verify you are human', // hCaptcha / reCAPTCHA
634
926
  'prove your humanity',
635
- 'attention required',
636
- 'file a ticket',
637
- 'unknown error',
927
+ 'attention required', // Cloudflare block page
928
+ 'enable javascript and cookies to continue', // Cloudflare
929
+ 'please complete the security check', // Cloudflare/Akamai
930
+ ];
931
+ if (STRONG_PHRASES.some((p) => lower.includes(p))) return true;
932
+
933
+ // Weak phrases — show up on real challenge pages but ALSO on legitimate
934
+ // small error pages. Only count when the page is itself tiny (low node
935
+ // count or near-empty text), which is the corroborating signal that
936
+ // separates a real error UI from a challenge skeleton.
937
+ const WEAK_PHRASES = [
938
+ 'please wait',
939
+ 'request blocked',
638
940
  'access denied',
639
941
  'permission denied',
640
- 'request blocked',
942
+ 'unknown error',
943
+ 'file a ticket',
641
944
  ];
642
- const lower = text.toLowerCase();
643
- return challengePhrases.some((p) => lower.includes(p));
945
+ const tinyPage = (nodeCount !== undefined && nodeCount < 30) || text.trim().length < 50;
946
+ if (tinyPage && WEAK_PHRASES.some((p) => lower.includes(p))) return true;
947
+
948
+ return false;
644
949
  }
645
950
 
646
951
  function flattenTreeText(node) {