barebrowse 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -8,7 +8,7 @@
8
8
  * const snapshot = await browse('https://example.com');
9
9
  */
10
10
 
11
- import { launch } from './chromium.js';
11
+ import { launch, attach, cleanupBrowser } from './chromium.js';
12
12
  import { createCDP } from './cdp.js';
13
13
  import { formatTree } from './aria.js';
14
14
  import { authenticate } from './auth.js';
@@ -16,6 +16,8 @@ import { prune as pruneTree } from './prune.js';
16
16
  import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress, hover as cdpHover, select as cdpSelect, drag as cdpDrag, upload as cdpUpload } from './interact.js';
17
17
  import { dismissConsent } from './consent.js';
18
18
  import { applyStealth } from './stealth.js';
19
+ import { waitForNetworkIdle } from './network-idle.js';
20
+ import { join as pathJoin } from 'node:path';
19
21
 
20
22
  /**
21
23
  * Browse a URL and return an ARIA snapshot.
@@ -35,15 +37,18 @@ export async function browse(url, opts = {}) {
35
37
 
36
38
  let browser = null;
37
39
  let cdp = null;
40
+ // Forward caller-supplied launch knobs (binary, userDataDir, proxy) into
41
+ // every launch() call below, including hybrid-fallback re-launches.
42
+ const launchOpts = { proxy: opts.proxy, binary: opts.binary, userDataDir: opts.userDataDir };
38
43
 
39
44
  try {
40
45
  // Step 1: Get a CDP connection
41
46
  if (mode === 'headed') {
42
- browser = await launch({ headed: true, proxy: opts.proxy });
47
+ browser = await launch({ ...launchOpts, headed: true });
43
48
  cdp = await createCDP(browser.wsUrl);
44
49
  } else {
45
50
  // headless or hybrid (start headless)
46
- browser = await launch({ proxy: opts.proxy });
51
+ browser = await launch(launchOpts);
47
52
  cdp = await createCDP(browser.wsUrl);
48
53
  }
49
54
 
@@ -77,10 +82,10 @@ export async function browse(url, opts = {}) {
77
82
  if (mode === 'hybrid' && isChallengePage(tree, nodeCount)) {
78
83
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
79
84
  cdp.close();
80
- if (browser) { browser.process.kill(); browser = null; }
85
+ await cleanupBrowser(browser); browser = null;
81
86
 
82
87
  try {
83
- browser = await launch({ headed: true, proxy: opts.proxy });
88
+ browser = await launch({ ...launchOpts, headed: true });
84
89
  cdp = await createCDP(browser.wsUrl);
85
90
  page = await createPage(cdp, false, { viewport: opts.viewport });
86
91
  await suppressPermissions(cdp);
@@ -113,7 +118,7 @@ export async function browse(url, opts = {}) {
113
118
  return snapshot;
114
119
  } finally {
115
120
  if (cdp) cdp.close();
116
- if (browser) browser.process.kill();
121
+ await cleanupBrowser(browser);
117
122
  }
118
123
  }
119
124
 
@@ -122,28 +127,54 @@ export async function browse(url, opts = {}) {
122
127
  *
123
128
  * @param {object} [opts]
124
129
  * @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
130
+ * @param {number} [opts.port] - Attach to an already-running Chromium at this
131
+ * CDP port instead of launching a new one. The browser keeps running on
132
+ * close(); only the tab we created is torn down. Use this to drive a
133
+ * user's logged-in session (start Chromium with --remote-debugging-port=N).
134
+ * @param {string} [opts.downloadPath] - Directory to save downloaded files.
135
+ * Default: a per-session subdirectory under the OS temp dir. Downloads
136
+ * land here as <guid>; check `page.downloads` for { url, suggestedFilename,
137
+ * savedPath, state, totalBytes, receivedBytes } per file.
125
138
  * @returns {Promise<object>} Page handle with goto, snapshot, close
126
139
  */
127
140
  export async function connect(opts = {}) {
128
141
  const mode = opts.mode || 'headless';
142
+ const attachMode = !!opts.port;
129
143
  let browser = null;
130
144
  let cdp;
131
-
132
- if (mode === 'headed') {
133
- browser = await launch({ headed: true, proxy: opts.proxy });
145
+ // Forward caller-supplied launch knobs into every launch() below,
146
+ // including hybrid-fallback re-launches inside goto().
147
+ const launchOpts = { proxy: opts.proxy, binary: opts.binary, userDataDir: opts.userDataDir };
148
+
149
+ if (attachMode) {
150
+ // Reuse the user's running browser — do not launch, do not own the
151
+ // profile. cleanupBrowser() is a no-op on this shape (process: null,
152
+ // ownedProfileDir: null), which is the whole point.
153
+ browser = await attach({ port: opts.port });
154
+ cdp = await createCDP(browser.wsUrl);
155
+ } else if (mode === 'headed') {
156
+ browser = await launch({ ...launchOpts, headed: true });
134
157
  cdp = await createCDP(browser.wsUrl);
135
158
  } else {
136
- browser = await launch({ proxy: opts.proxy });
159
+ browser = await launch(launchOpts);
137
160
  cdp = await createCDP(browser.wsUrl);
138
161
  }
139
162
 
140
- let currentlyHeaded = (mode === 'headed');
163
+ // In attach mode we don't know (and shouldn't assume) the user's headed/
164
+ // headless state — treat it as headed so stealth patches are skipped
165
+ // (they'd persist in the user's session via addScriptToEvaluateOnNewDocument)
166
+ // and the headed→headless rewind in goto() is gated off below.
167
+ let currentlyHeaded = attachMode || (mode === 'headed');
141
168
  let page = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
142
169
  let refMap = new Map();
143
170
  let botBlocked = false;
144
171
 
145
- // Suppress permission prompts for all modes
146
- await suppressPermissions(cdp);
172
+ // Suppress permission prompts. Skipped in attach mode — Browser.setPermission
173
+ // is browser-wide (no origin scope here), so flipping permissions to denied
174
+ // would leak into the user's other tabs.
175
+ if (!attachMode) {
176
+ await suppressPermissions(cdp);
177
+ }
147
178
 
148
179
  // Load storage state (cookies + localStorage) from file
149
180
  if (opts.storageState) {
@@ -156,8 +187,72 @@ export async function connect(opts = {}) {
156
187
  } catch { /* file not found or invalid — continue without */ }
157
188
  }
158
189
 
159
- // Auto-dismiss JS dialogs (alert, confirm, prompt)
190
+ // Download tracking wire Browser.setDownloadBehavior so files actually
191
+ // land on disk (default Chromium would route them to ~/Downloads or
192
+ // nowhere useful in headless), and listen for downloadWillBegin /
193
+ // downloadProgress so callers can read `page.downloads` to know what
194
+ // arrived. In attach mode we don't change the user's running browser's
195
+ // download dir — they almost certainly have an existing preference.
196
+ const downloads = [];
197
+ let ownedDownloadDir = null;
198
+ if (!attachMode) {
199
+ let downloadPath = opts.downloadPath;
200
+ if (!downloadPath) {
201
+ const { mkdtempSync } = await import('node:fs');
202
+ const { tmpdir } = await import('node:os');
203
+ ownedDownloadDir = mkdtempSync(pathJoin(tmpdir(), 'barebrowse-dl-'));
204
+ downloadPath = ownedDownloadDir;
205
+ }
206
+ // Register listeners BEFORE sending setDownloadBehavior so no
207
+ // downloadWillBegin / downloadProgress event can fire into a session
208
+ // without subscribers — about:blank can't initiate a download so the
209
+ // window is microscopic in practice, but ordering it correctly costs
210
+ // nothing.
211
+ cdp.on('Browser.downloadWillBegin', (params) => {
212
+ downloads.push({
213
+ guid: params.guid,
214
+ url: params.url,
215
+ suggestedFilename: params.suggestedFilename,
216
+ savedPath: pathJoin(downloadPath, params.guid),
217
+ state: 'inProgress',
218
+ totalBytes: 0,
219
+ receivedBytes: 0,
220
+ });
221
+ });
222
+ cdp.on('Browser.downloadProgress', (params) => {
223
+ const d = downloads.find((x) => x.guid === params.guid);
224
+ if (!d) return;
225
+ d.state = params.state; // 'inProgress' | 'completed' | 'canceled'
226
+ d.totalBytes = params.totalBytes;
227
+ d.receivedBytes = params.receivedBytes;
228
+ });
229
+ try {
230
+ // 'allowAndName' names saved files by guid for a stable, predictable
231
+ // path; the suggested filename is still surfaced on the download record.
232
+ await cdp.send('Browser.setDownloadBehavior', {
233
+ behavior: 'allowAndName', downloadPath, eventsEnabled: true,
234
+ });
235
+ } catch {
236
+ // Older Chrome may not accept 'allowAndName' — fall back to 'allow'
237
+ // which uses the suggested filename verbatim (no GUID).
238
+ try {
239
+ await cdp.send('Browser.setDownloadBehavior', {
240
+ behavior: 'allow', downloadPath, eventsEnabled: true,
241
+ });
242
+ } catch {
243
+ // Download capture unavailable on this Chrome — downloads still
244
+ // happen, we just can't observe them. page.downloads stays empty.
245
+ }
246
+ }
247
+ }
248
+
249
+ // JS dialog handling (alert, confirm, prompt, beforeunload). Default is
250
+ // auto-accept everything except beforeunload (auto-dismiss). The caller
251
+ // can install a custom decision via page.onDialog(handler) — the handler
252
+ // gets { type, message, defaultPrompt } and may return
253
+ // { accept: bool, promptText: string } to override.
160
254
  const dialogLog = [];
255
+ let onDialogHandler = null;
161
256
  function setupDialogHandler(session) {
162
257
  session.on('Page.javascriptDialogOpening', async (params) => {
163
258
  dialogLog.push({
@@ -165,23 +260,45 @@ export async function connect(opts = {}) {
165
260
  message: params.message,
166
261
  timestamp: new Date().toISOString(),
167
262
  });
168
- await session.send('Page.handleJavaScriptDialog', {
169
- accept: params.type !== 'beforeunload',
170
- promptText: params.defaultPrompt || '',
171
- });
263
+ let accept = params.type !== 'beforeunload';
264
+ let promptText = params.defaultPrompt || '';
265
+ if (onDialogHandler) {
266
+ try {
267
+ const decision = await onDialogHandler({
268
+ type: params.type,
269
+ message: params.message,
270
+ defaultPrompt: params.defaultPrompt || '',
271
+ });
272
+ if (decision && typeof decision === 'object') {
273
+ if (typeof decision.accept === 'boolean') accept = decision.accept;
274
+ if (typeof decision.promptText === 'string') promptText = decision.promptText;
275
+ }
276
+ } catch {
277
+ // Handler threw — fall back to defaults so the page doesn't hang
278
+ // waiting for a never-arriving handleJavaScriptDialog reply.
279
+ }
280
+ }
281
+ await session.send('Page.handleJavaScriptDialog', { accept, promptText });
172
282
  });
173
283
  }
174
284
  setupDialogHandler(page.session);
175
285
 
176
286
  return {
177
287
  async goto(url, timeout = 30000) {
178
- // Switch back to headless if we fell back to headed previously
179
- if (currentlyHeaded && mode === 'hybrid') {
288
+ // Refs from the previous page are about to become invalid — clear
289
+ // before navigating so a stale click(ref) errors clearly instead of
290
+ // silently resolving to whatever backendNodeId happens to still be in
291
+ // the map.
292
+ refMap = new Map();
293
+ // Switch back to headless if we fell back to headed previously.
294
+ // Not in attach mode — we never own the browser there, so there's
295
+ // nothing to rewind.
296
+ if (currentlyHeaded && mode === 'hybrid' && !attachMode) {
180
297
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
181
298
  cdp.close();
182
- if (browser) { browser.process.kill(); browser = null; }
299
+ await cleanupBrowser(browser); browser = null;
183
300
 
184
- browser = await launch({ proxy: opts.proxy });
301
+ browser = await launch(launchOpts);
185
302
  cdp = await createCDP(browser.wsUrl);
186
303
  page = await createPage(cdp, true, { viewport: opts.viewport });
187
304
  setupDialogHandler(page.session);
@@ -198,14 +315,16 @@ export async function connect(opts = {}) {
198
315
  const { tree, nodeCount } = await ariaTree(page);
199
316
  botBlocked = isChallengePage(tree, nodeCount);
200
317
 
201
- // Hybrid fallback: if bot-blocked, retry with headed browser
202
- if (botBlocked && mode === 'hybrid') {
318
+ // Hybrid fallback: if bot-blocked, retry with headed browser.
319
+ // Suppressed in attach mode we can't tear down the user's running
320
+ // browser and we don't know what mode they started it in.
321
+ if (botBlocked && mode === 'hybrid' && !attachMode) {
203
322
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
204
323
  cdp.close();
205
- if (browser) { browser.process.kill(); browser = null; }
324
+ await cleanupBrowser(browser); browser = null;
206
325
 
207
326
  try {
208
- browser = await launch({ headed: true, proxy: opts.proxy });
327
+ browser = await launch({ ...launchOpts, headed: true });
209
328
  cdp = await createCDP(browser.wsUrl);
210
329
  page = await createPage(cdp, false, { viewport: opts.viewport });
211
330
  setupDialogHandler(page.session);
@@ -226,15 +345,29 @@ export async function connect(opts = {}) {
226
345
  async goBack() {
227
346
  const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
228
347
  if (currentIndex <= 0) throw new Error('No previous page in history');
348
+ const loadPromise = page.session.once('Page.loadEventFired', 30000);
229
349
  await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex - 1].id });
230
- await new Promise((r) => setTimeout(r, 500));
350
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
351
+ refMap = new Map(); // refs from the previous page are now invalid
231
352
  },
232
353
 
233
354
  async goForward() {
234
355
  const { currentIndex, entries } = await page.session.send('Page.getNavigationHistory');
235
356
  if (currentIndex >= entries.length - 1) throw new Error('No next page in history');
357
+ const loadPromise = page.session.once('Page.loadEventFired', 30000);
236
358
  await page.session.send('Page.navigateToHistoryEntry', { entryId: entries[currentIndex + 1].id });
237
- await new Promise((r) => setTimeout(r, 500));
359
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
360
+ refMap = new Map();
361
+ },
362
+
363
+ async reload(reloadOpts = {}) {
364
+ const timeout = reloadOpts.timeout || 30000;
365
+ const loadPromise = page.session.once('Page.loadEventFired', timeout);
366
+ await page.session.send('Page.reload', {
367
+ ignoreCache: !!reloadOpts.ignoreCache,
368
+ });
369
+ try { await loadPromise; } catch { await new Promise((r) => setTimeout(r, 500)); }
370
+ refMap = new Map(); // refs from the pre-reload page are invalid
238
371
  },
239
372
 
240
373
  async injectCookies(url, cookieOpts) {
@@ -256,15 +389,15 @@ export async function connect(opts = {}) {
256
389
  },
257
390
 
258
391
  async click(ref) {
259
- const backendNodeId = refMap.get(ref);
260
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
261
- await cdpClick(page.session, backendNodeId);
392
+ const entry = refMap.get(ref);
393
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
394
+ await cdpClick(entry.session, entry.backendNodeId);
262
395
  },
263
396
 
264
397
  async type(ref, text, typeOpts) {
265
- const backendNodeId = refMap.get(ref);
266
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
267
- await cdpType(page.session, backendNodeId, text, typeOpts);
398
+ const entry = refMap.get(ref);
399
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
400
+ await cdpType(entry.session, entry.backendNodeId, text, typeOpts);
268
401
  },
269
402
 
270
403
  async scroll(deltaY) {
@@ -276,29 +409,34 @@ export async function connect(opts = {}) {
276
409
  },
277
410
 
278
411
  async hover(ref) {
279
- const backendNodeId = refMap.get(ref);
280
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
281
- await cdpHover(page.session, backendNodeId);
412
+ const entry = refMap.get(ref);
413
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
414
+ await cdpHover(entry.session, entry.backendNodeId);
282
415
  },
283
416
 
284
417
  async select(ref, value) {
285
- const backendNodeId = refMap.get(ref);
286
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
287
- await cdpSelect(page.session, backendNodeId, value);
418
+ const entry = refMap.get(ref);
419
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
420
+ await cdpSelect(entry.session, entry.backendNodeId, value);
288
421
  },
289
422
 
290
423
  async drag(fromRef, toRef) {
291
- const fromId = refMap.get(fromRef);
292
- const toId = refMap.get(toRef);
293
- if (!fromId) throw new Error(`No element found for ref "${fromRef}"`);
294
- if (!toId) throw new Error(`No element found for ref "${toRef}"`);
295
- await cdpDrag(page.session, fromId, toId);
424
+ const from = refMap.get(fromRef);
425
+ const to = refMap.get(toRef);
426
+ if (!from) throw new Error(`No element found for ref "${fromRef}"`);
427
+ if (!to) throw new Error(`No element found for ref "${toRef}"`);
428
+ // Drag across different frames isn't physically meaningful — bail
429
+ // rather than mix sessions and produce nonsense coordinates.
430
+ if (from.session !== to.session) {
431
+ throw new Error('drag() between elements in different frames is not supported');
432
+ }
433
+ await cdpDrag(from.session, from.backendNodeId, to.backendNodeId);
296
434
  },
297
435
 
298
436
  async upload(ref, files) {
299
- const backendNodeId = refMap.get(ref);
300
- if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
301
- await cdpUpload(page.session, backendNodeId, files);
437
+ const entry = refMap.get(ref);
438
+ if (!entry) throw new Error(`No element found for ref "${ref}"`);
439
+ await cdpUpload(entry.session, entry.backendNodeId, files);
302
440
  },
303
441
 
304
442
  async pdf(pdfOpts = {}) {
@@ -320,7 +458,17 @@ export async function connect(opts = {}) {
320
458
  const { targetInfos } = await cdp.send('Target.getTargets');
321
459
  const pages = targetInfos.filter((t) => t.type === 'page');
322
460
  if (index < 0 || index >= pages.length) throw new Error(`Tab index ${index} out of range (0-${pages.length - 1})`);
323
- await cdp.send('Target.activateTarget', { targetId: pages[index].targetId });
461
+ const target = pages[index];
462
+ await cdp.send('Target.activateTarget', { targetId: target.targetId });
463
+ if (target.targetId === page.targetId) return; // already on this tab
464
+ // Detach from old session, attach to new — the page variable is the
465
+ // closure handle used by every method below, so swapping it makes
466
+ // snapshot/click/type/etc. operate on the new tab.
467
+ const oldSessionId = page.sessionId;
468
+ page = await attachToExistingTarget(cdp, target.targetId);
469
+ refMap = new Map(); // refs from the previous tab are no longer valid
470
+ setupDialogHandler(page.session);
471
+ try { await cdp.send('Target.detachFromTarget', { sessionId: oldSessionId }); } catch {}
324
472
  },
325
473
 
326
474
  async waitFor(waitOpts = {}) {
@@ -363,6 +511,18 @@ export async function connect(opts = {}) {
363
511
 
364
512
  dialogLog,
365
513
 
514
+ /**
515
+ * Install a custom JS dialog handler. The handler is called with
516
+ * `{ type, message, defaultPrompt }` and may return (sync or async)
517
+ * `{ accept: bool, promptText: string }` to override the auto-accept
518
+ * default. Pass null to restore the default behavior.
519
+ */
520
+ onDialog(handler) {
521
+ onDialogHandler = handler;
522
+ },
523
+
524
+ downloads,
525
+
366
526
  async screenshot(screenshotOpts = {}) {
367
527
  const format = screenshotOpts.format || 'png';
368
528
  const params = { format };
@@ -389,12 +549,13 @@ export async function connect(opts = {}) {
389
549
  return waitForNetworkIdle(page.session, idleOpts);
390
550
  },
391
551
 
392
- /** Raw CDP session for escape hatch */
393
- cdp: page.session,
552
+ /** Raw CDP session for escape hatch — getter so it survives hybrid fallback / tab swaps */
553
+ get cdp() { return page.session; },
394
554
 
395
555
  async createTab() {
396
556
  const tab = await createPage(cdp, !currentlyHeaded, { viewport: opts.viewport });
397
557
  await suppressPermissions(cdp);
558
+ setupDialogHandler(tab.session);
398
559
  let tabBotBlocked = false;
399
560
  return {
400
561
  async goto(url, timeout = 30000) {
@@ -422,7 +583,15 @@ export async function connect(opts = {}) {
422
583
  async close() {
423
584
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
424
585
  cdp.close();
425
- if (browser) browser.process.kill();
586
+ await cleanupBrowser(browser);
587
+ // If we created the download dir ourselves, clean it up too. Caller-
588
+ // supplied opts.downloadPath stays — the caller owns the lifecycle.
589
+ if (ownedDownloadDir) {
590
+ try {
591
+ const { rmSync } = await import('node:fs');
592
+ rmSync(ownedDownloadDir, { recursive: true, force: true });
593
+ } catch {}
594
+ }
426
595
  },
427
596
  };
428
597
  }
@@ -486,7 +655,69 @@ async function createPage(cdp, stealth = false, pageOpts = {}) {
486
655
  }
487
656
  }
488
657
 
489
- return { session, targetId, sessionId };
658
+ // Track child frame sessions (OOPIF) so ariaTree() can read across frame
659
+ // boundaries. Same-origin iframes don't get their own session and stay
660
+ // queryable via the main session with a frameId param — see ariaTree().
661
+ const framesByFrameId = await attachFrameTracking(cdp, session);
662
+
663
+ return { session, targetId, sessionId, framesByFrameId };
664
+ }
665
+
666
+ /**
667
+ * Wire Target.setAutoAttach on a page session so every OOPIF child target gets
668
+ * its own CDP session, enabled and registered. Returns a live Map<frameId,
669
+ * { session, sessionId, targetId }> that updates as frames attach/detach.
670
+ */
671
+ async function attachFrameTracking(cdp, mainSession) {
672
+ const framesByFrameId = new Map();
673
+
674
+ mainSession.on('Target.attachedToTarget', async (params) => {
675
+ if (params.targetInfo?.type !== 'iframe') return;
676
+ const childSessionId = params.sessionId;
677
+ const childSession = cdp.session(childSessionId);
678
+ // For OOPIF, targetId === frameId — see CDP Target domain docs.
679
+ const frameId = params.targetInfo.targetId;
680
+ framesByFrameId.set(frameId, { session: childSession, sessionId: childSessionId, targetId: frameId });
681
+ // Enable domains on the child so we can read its AX tree.
682
+ // Recursively auto-attach so nested OOPIF iframes also get sessions.
683
+ try { await childSession.send('Page.enable'); } catch {}
684
+ try { await childSession.send('DOM.enable'); } catch {}
685
+ try {
686
+ await childSession.send('Target.setAutoAttach', {
687
+ autoAttach: true, flatten: true, waitForDebuggerOnStart: false,
688
+ });
689
+ } catch {}
690
+ try { await childSession.send('Runtime.runIfWaitingForDebugger'); } catch {}
691
+ });
692
+
693
+ mainSession.on('Target.detachedFromTarget', (params) => {
694
+ for (const [frameId, entry] of framesByFrameId) {
695
+ if (entry.sessionId === params.sessionId) {
696
+ framesByFrameId.delete(frameId);
697
+ return;
698
+ }
699
+ }
700
+ });
701
+
702
+ await mainSession.send('Target.setAutoAttach', {
703
+ autoAttach: true, flatten: true, waitForDebuggerOnStart: false,
704
+ });
705
+
706
+ return framesByFrameId;
707
+ }
708
+
709
+ /**
710
+ * Attach a CDP session to an existing target (e.g. a tab opened by window.open).
711
+ * Enables the same domains as createPage so snapshot/click/type work uniformly.
712
+ */
713
+ async function attachToExistingTarget(cdp, targetId) {
714
+ const { sessionId } = await cdp.send('Target.attachToTarget', { targetId, flatten: true });
715
+ const session = cdp.session(sessionId);
716
+ await session.send('Page.enable');
717
+ await session.send('Network.enable');
718
+ await session.send('DOM.enable');
719
+ const framesByFrameId = await attachFrameTracking(cdp, session);
720
+ return { session, targetId, sessionId, framesByFrameId };
490
721
  }
491
722
 
492
723
  /**
@@ -502,37 +733,111 @@ async function navigate(page, url, timeout = 30000) {
502
733
 
503
734
  /**
504
735
  * Get the ARIA accessibility tree for a page as a nested object.
736
+ *
737
+ * Walks every frame (main + iframes) via Page.getFrameTree, queries each
738
+ * frame's AX tree on the right session (child session for OOPIF, main
739
+ * session with frameId param for same-origin), and splices child frame
740
+ * trees under their iframe placeholders in the parent. Refs are assigned
741
+ * by a flat global counter so click/type/etc can resolve the right session
742
+ * without the agent having to think about frames at all.
505
743
  */
506
744
  async function ariaTree(page) {
507
- await page.session.send('Accessibility.enable');
508
- const { nodes } = await page.session.send('Accessibility.getFullAXTree');
509
- const tree = buildTree(nodes);
510
-
511
- // Build ref backendDOMNodeId map in one pass over raw CDP nodes
745
+ const main = page.session;
746
+ await main.send('Accessibility.enable');
747
+
748
+ // 1. Linearize the frame tree depth-first: index 0 is the main frame.
749
+ const { frameTree } = await main.send('Page.getFrameTree');
750
+ const frames = [];
751
+ (function walk(node, parentId) {
752
+ frames.push({ frame: node.frame, parentId });
753
+ for (const child of node.childFrames || []) walk(child, node.frame.id);
754
+ })(frameTree, null);
755
+
756
+ // 2. For each frame, fetch its AX nodes and build a tree. refMap value is
757
+ // { session, backendNodeId } so click(ref) routes to the right CDP
758
+ // session (essential for cross-process iframes). refCounter is shared
759
+ // across all frames in one snapshot — refs stay flat integers, so the
760
+ // visible [ref=N] format and existing agent prompts don't change.
512
761
  const refMap = new Map();
513
- for (const node of nodes) {
514
- if (node.backendDOMNodeId) {
515
- refMap.set(node.nodeId, node.backendDOMNodeId);
762
+ const treesByFrameId = new Map();
763
+ const sessionByFrameId = new Map();
764
+ const refCounter = { value: 1 };
765
+ let totalNodes = 0;
766
+
767
+ for (let i = 0; i < frames.length; i++) {
768
+ const { frame } = frames[i];
769
+ const childEntry = page.framesByFrameId?.get(frame.id);
770
+ const frameSession = childEntry ? childEntry.session : main;
771
+ sessionByFrameId.set(frame.id, frameSession);
772
+
773
+ let nodes = [];
774
+ try {
775
+ if (childEntry) {
776
+ // OOPIF — use the child session, no frameId param needed.
777
+ try { await frameSession.send('Accessibility.enable'); } catch {}
778
+ const res = await frameSession.send('Accessibility.getFullAXTree');
779
+ nodes = res.nodes;
780
+ } else {
781
+ // Main frame or same-origin child — query main session, scoping by
782
+ // frameId for children (Accessibility.getFullAXTree without frameId
783
+ // would just return the top frame, dropping same-origin iframe content).
784
+ const params = i === 0 ? {} : { frameId: frame.id };
785
+ const res = await main.send('Accessibility.getFullAXTree', params);
786
+ nodes = res.nodes;
787
+ }
788
+ } catch {
789
+ // Frame may have navigated mid-snapshot — skip it rather than fail
790
+ // the whole snapshot. The placeholder iframe node will simply have
791
+ // no children in the merged tree.
792
+ continue;
793
+ }
794
+
795
+ totalNodes += nodes.length;
796
+ const tree = buildTree(nodes, frameSession, refMap, refCounter);
797
+ if (tree) treesByFrameId.set(frame.id, tree);
798
+ }
799
+
800
+ // 3. Splice each child frame's tree under its iframe placeholder node in
801
+ // the parent. DOM.getFrameOwner gives the iframe element's
802
+ // backendNodeId in the parent's view; we match it against AX nodes.
803
+ for (const { frame, parentId } of frames) {
804
+ if (parentId === null) continue;
805
+ const parentTree = treesByFrameId.get(parentId);
806
+ const childTree = treesByFrameId.get(frame.id);
807
+ if (!parentTree || !childTree) continue;
808
+ const parentSession = sessionByFrameId.get(parentId);
809
+ try {
810
+ const { backendNodeId } = await parentSession.send('DOM.getFrameOwner', { frameId: frame.id });
811
+ const placeholder = findNodeByBackend(parentTree, backendNodeId);
812
+ if (placeholder) placeholder.children = [childTree];
813
+ } catch {
814
+ // Frame owner lookup failed — leave the iframe placeholder as-is.
516
815
  }
517
816
  }
518
817
 
519
- return { tree, refMap, nodeCount: nodes.length };
818
+ const root = treesByFrameId.get(frames[0].frame.id) || null;
819
+ return { tree: root, refMap, nodeCount: totalNodes };
520
820
  }
521
821
 
522
822
  /**
523
- * Transform CDP's flat AXNode array into a nested tree.
823
+ * Transform CDP's flat AXNode array into a nested tree. Every tree node gets
824
+ * a globally unique flat ref string from `refCounter` (shared across all
825
+ * frames in one snapshot), and refMap is populated with ref → { session,
826
+ * backendNodeId } so click/type can route to the right CDP session even when
827
+ * the element lives in an iframe.
524
828
  * CDP nodes have parentId — we use that exclusively to avoid double-linking.
525
829
  */
526
- function buildTree(nodes) {
830
+ function buildTree(nodes, session, refMap, refCounter) {
527
831
  if (!nodes || nodes.length === 0) return null;
528
832
 
529
833
  const nodeMap = new Map();
530
- const linked = new Set(); // track which nodes have been linked to a parent
834
+ const linked = new Set();
531
835
 
532
- // First pass: create tree nodes
836
+ // First pass: create tree nodes + populate refMap with flat global refs
533
837
  for (const node of nodes) {
838
+ const ref = String(refCounter.value++);
534
839
  nodeMap.set(node.nodeId, {
535
- nodeId: node.nodeId,
840
+ nodeId: ref,
536
841
  backendDOMNodeId: node.backendDOMNodeId,
537
842
  role: node.role?.value || '',
538
843
  name: node.name?.value || '',
@@ -540,6 +845,9 @@ function buildTree(nodes) {
540
845
  ignored: node.ignored || false,
541
846
  children: [],
542
847
  });
848
+ if (node.backendDOMNodeId && refMap) {
849
+ refMap.set(ref, { session, backendNodeId: node.backendDOMNodeId });
850
+ }
543
851
  }
544
852
 
545
853
  // Second pass: link via parentId only (avoids duplicates from childIds)
@@ -560,6 +868,16 @@ function buildTree(nodes) {
560
868
  return root;
561
869
  }
562
870
 
871
+ function findNodeByBackend(node, backendNodeId) {
872
+ if (!node) return null;
873
+ if (node.backendDOMNodeId === backendNodeId) return node;
874
+ for (const child of node.children || []) {
875
+ const found = findNodeByBackend(child, backendNodeId);
876
+ if (found) return found;
877
+ }
878
+ return null;
879
+ }
880
+
563
881
  function extractProps(props) {
564
882
  if (!props) return {};
565
883
  const result = {};
@@ -568,79 +886,58 @@ function extractProps(props) {
568
886
  }
569
887
 
570
888
  /**
571
- * Wait until no network requests are pending for `idle` ms.
572
- * @param {object} session - Session-scoped CDP handle
573
- * @param {object} [opts]
574
- * @param {number} [opts.timeout=30000] - Max wait time
575
- * @param {number} [opts.idle=500] - Idle threshold in ms
576
- */
577
- function waitForNetworkIdle(session, opts = {}) {
578
- const timeout = opts.timeout || 30000;
579
- const idle = opts.idle || 500;
580
-
581
- return new Promise((resolve, reject) => {
582
- let pending = 0;
583
- let timer = null;
584
- const unsubs = [];
585
-
586
- const done = () => {
587
- clearTimeout(timer);
588
- clearTimeout(deadlineTimer);
589
- for (const unsub of unsubs) unsub();
590
- resolve();
591
- };
592
-
593
- const check = () => {
594
- clearTimeout(timer);
595
- if (pending <= 0) {
596
- pending = 0;
597
- timer = setTimeout(done, idle);
598
- }
599
- };
600
-
601
- unsubs.push(session.on('Network.requestWillBeSent', () => { pending++; clearTimeout(timer); }));
602
- unsubs.push(session.on('Network.loadingFinished', () => { pending--; check(); }));
603
- unsubs.push(session.on('Network.loadingFailed', () => { pending--; check(); }));
604
-
605
- const deadlineTimer = setTimeout(() => {
606
- for (const unsub of unsubs) unsub();
607
- reject(new Error(`waitForNetworkIdle timed out after ${timeout}ms`));
608
- }, timeout);
609
-
610
- // Start check immediately (might already be idle)
611
- check();
612
- });
613
- }
614
-
615
- /**
616
- * Detect if a page is a bot-challenge page (Cloudflare, etc.).
617
- * Heuristic: low ARIA node count, short text, or known challenge phrases.
889
+ * Detect if a page is a bot-challenge page (Cloudflare, hCaptcha, etc.).
890
+ *
891
+ * Pre-H9 this was over-aggressive: `nodeCount < 50` alone fired on any
892
+ * legitimate small page (404s, simple landings, error pages), and generic
893
+ * phrases like "access denied" / "unknown error" / "permission denied"
894
+ * triggered on real HTTP 4xx/5xx pages, kicking hybrid mode into a costly
895
+ * headed fallback for nothing.
896
+ *
897
+ * H9 split: STRONG_PHRASES are essentially-unambiguous challenge UI and
898
+ * fire regardless of page size; WEAK_PHRASES only fire when the page is
899
+ * ALSO tiny (so a legitimate-looking error page with "access denied" in
900
+ * its body doesn't trip the fallback).
901
+ *
618
902
  * @param {object} tree - Nested ARIA tree (from buildTree)
619
903
  * @param {number} [nodeCount] - Raw CDP node count (from Accessibility.getFullAXTree)
620
904
  */
621
- function isChallengePage(tree, nodeCount) {
622
- if (!tree) return true;
623
- // Real pages have 50+ ARIA nodes. Bot challenges have <20.
624
- if (nodeCount !== undefined && nodeCount < 50) return true;
905
+ export function isChallengePage(tree, nodeCount) {
906
+ if (!tree) return true; // truly empty AX tree — something went wrong fetching the page
907
+
625
908
  const text = flattenTreeText(tree);
626
- // Near-empty pages are almost certainly blocks
627
- if (text.trim().length < 50) return true;
628
- const challengePhrases = [
629
- 'just a moment',
630
- 'checking if the site connection is secure',
631
- 'checking your browser',
632
- 'please wait',
633
- 'verify you are human',
909
+ const lower = text.toLowerCase();
910
+
911
+ // Strong phrases — distinctive enough to identify the challenge product
912
+ // by name. Fire on their own regardless of node count.
913
+ const STRONG_PHRASES = [
914
+ 'just a moment', // Cloudflare interstitial
915
+ 'checking if the site connection is secure', // Cloudflare
916
+ 'checking your browser', // Various JS challenges
917
+ 'verify you are human', // hCaptcha / reCAPTCHA
634
918
  'prove your humanity',
635
- 'attention required',
636
- 'file a ticket',
637
- 'unknown error',
919
+ 'attention required', // Cloudflare block page
920
+ 'enable javascript and cookies to continue', // Cloudflare
921
+ 'please complete the security check', // Cloudflare/Akamai
922
+ ];
923
+ if (STRONG_PHRASES.some((p) => lower.includes(p))) return true;
924
+
925
+ // Weak phrases — show up on real challenge pages but ALSO on legitimate
926
+ // small error pages. Only count when the page is itself tiny (low node
927
+ // count or near-empty text), which is the corroborating signal that
928
+ // separates a real error UI from a challenge skeleton.
929
+ const WEAK_PHRASES = [
930
+ 'please wait',
931
+ 'request blocked',
638
932
  'access denied',
639
933
  'permission denied',
640
- 'request blocked',
934
+ 'unknown error',
935
+ 'file a ticket',
641
936
  ];
642
- const lower = text.toLowerCase();
643
- return challengePhrases.some((p) => lower.includes(p));
937
+ const tinyPage = (nodeCount !== undefined && nodeCount < 30) || text.trim().length < 50;
938
+ if (tinyPage && WEAK_PHRASES.some((p) => lower.includes(p))) return true;
939
+
940
+ return false;
644
941
  }
645
942
 
646
943
  function flattenTreeText(node) {