barebrowse 0.7.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/chromium.js CHANGED
@@ -6,7 +6,47 @@
6
6
  */
7
7
 
8
8
  import { execSync, spawn } from 'node:child_process';
9
- import { existsSync } from 'node:fs';
9
+ import { existsSync, rmSync } from 'node:fs';
10
+
11
+ // Track launched browsers so we can clean them up if the parent crashes.
12
+ // Registered exit handlers (one-time) iterate this set on shutdown.
13
+ const activeBrowsers = new Set();
14
+ let exitHandlersRegistered = false;
15
+
16
+ function reapAllSync() {
17
+ const toReap = [...activeBrowsers];
18
+ activeBrowsers.clear();
19
+ // Send SIGKILL to everything first so the kernel reaps in parallel
20
+ for (const b of toReap) {
21
+ try { if (!b.process.killed) b.process.kill('SIGKILL'); } catch {}
22
+ }
23
+ // Then poll each for actual death before removing its profile dir —
24
+ // Chromium can hold file handles briefly even after SIGKILL, which would
25
+ // race rmSync. Cap the wait so a stuck process can't hang shutdown.
26
+ for (const b of toReap) {
27
+ for (let i = 0; i < 20; i++) {
28
+ try { process.kill(b.process.pid, 0); } catch { break; }
29
+ try { execSync('sleep 0.05'); } catch {}
30
+ }
31
+ if (b.ownedProfileDir) {
32
+ try { rmSync(b.ownedProfileDir, { recursive: true, force: true }); } catch {}
33
+ }
34
+ }
35
+ }
36
+
37
+ function registerExitHandlers() {
38
+ if (exitHandlersRegistered) return;
39
+ exitHandlersRegistered = true;
40
+ // 'exit' is sync-only — must use synchronous APIs (SIGKILL, rmSync)
41
+ process.once('exit', reapAllSync);
42
+ for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP']) {
43
+ process.once(sig, () => {
44
+ reapAllSync();
45
+ // Re-raise default behavior so the parent's exit code matches the signal
46
+ process.kill(process.pid, sig);
47
+ });
48
+ }
49
+ }
10
50
 
11
51
  // Common Chromium binary paths by platform (Linux focus for POC)
12
52
  const CANDIDATES = [
@@ -75,6 +115,14 @@ export async function launch(opts = {}) {
75
115
  '--disable-sync',
76
116
  '--disable-translate',
77
117
  '--mute-audio',
118
+ // Force every iframe (same-origin included) into its own renderer so it
119
+ // gets a dedicated CDP session via Target.setAutoAttach. Without this,
120
+ // same-origin iframes stay in the parent process — getFullAXTree still
121
+ // works via frameId, but Input.dispatchMouseEvent on the parent session
122
+ // uses parent-viewport coords while DOM.getBoxModel for iframe-internal
123
+ // nodes returns frame-local coords, so clicks land off-target. The OOPIF
124
+ // path side-steps that: each frame has its own Input domain.
125
+ '--site-per-process',
78
126
  // Headless-only flags
79
127
  ...(!opts.headed ? ['--headless=new', '--hide-scrollbars'] : []),
80
128
  // Suppress permission prompts (location, notifications, camera, mic, etc.)
@@ -90,12 +138,14 @@ export async function launch(opts = {}) {
90
138
  args.push(`--proxy-server=${opts.proxy}`);
91
139
  }
92
140
 
141
+ // Track the temp profile dir only when we create one — caller-supplied dirs
142
+ // are the caller's to manage. ownedProfileDir gets rm'd in cleanupBrowser.
143
+ let ownedProfileDir = null;
93
144
  if (opts.userDataDir) {
94
145
  args.push(`--user-data-dir=${opts.userDataDir}`);
95
146
  } else {
96
- // Use a unique temp profile so we don't lock the user's profile
97
- // or conflict with parallel instances
98
- args.push(`--user-data-dir=/tmp/barebrowse-${process.pid}-${Date.now()}`);
147
+ ownedProfileDir = `/tmp/barebrowse-${process.pid}-${Date.now()}`;
148
+ args.push(`--user-data-dir=${ownedProfileDir}`);
99
149
  }
100
150
 
101
151
  // about:blank as initial page
@@ -138,7 +188,52 @@ export async function launch(opts = {}) {
138
188
  // Extract port from wsUrl
139
189
  const actualPort = parseInt(new URL(wsUrl).port, 10);
140
190
 
141
- return { wsUrl, process: child, port: actualPort };
191
+ const browser = { wsUrl, process: child, port: actualPort, ownedProfileDir };
192
+
193
+ // Register for parent-crash reaping. Auto-untrack on natural exit so
194
+ // a normally-exited browser doesn't leave a stale entry around.
195
+ registerExitHandlers();
196
+ activeBrowsers.add(browser);
197
+ child.once('exit', () => activeBrowsers.delete(browser));
198
+
199
+ return browser;
200
+ }
201
+
202
+ /**
203
+ * Kill a launched browser and remove its temp profile dir (if we created one).
204
+ * Waits up to 2s for the process to actually exit before unlinking the dir —
205
+ * Chromium can still hold files briefly after SIGTERM, which races rmSync.
206
+ * Safe to call on partially-failed launches or already-dead processes.
207
+ * @returns {Promise<void>}
208
+ */
209
+ export async function cleanupBrowser(browser) {
210
+ if (!browser) return;
211
+ activeBrowsers.delete(browser);
212
+ if (browser.process && !browser.process.killed && browser.process.exitCode === null) {
213
+ const exited = new Promise((resolve) => {
214
+ const timer = setTimeout(resolve, 2000);
215
+ browser.process.once('exit', () => { clearTimeout(timer); resolve(); });
216
+ });
217
+ try { browser.process.kill(); } catch {}
218
+ await exited;
219
+ }
220
+ if (browser.ownedProfileDir) {
221
+ // Chromium can still flush files for ~hundreds of ms after exit; with
222
+ // --site-per-process (added in H2) every iframe is its own renderer
223
+ // process, each with its own pending file handles, so the old 10×100ms
224
+ // window (1s) wasn't always enough under parallel test load. Now
225
+ // 25×100ms (2.5s) plus a polling jitter to avoid every concurrent
226
+ // cleanup hammering at the same tick.
227
+ for (let i = 0; i < 25; i++) {
228
+ try {
229
+ rmSync(browser.ownedProfileDir, { recursive: true, force: true });
230
+ break;
231
+ } catch (err) {
232
+ if (err.code !== 'ENOTEMPTY' && err.code !== 'EBUSY') break;
233
+ await new Promise((r) => setTimeout(r, 100 + Math.floor(Math.random() * 50)));
234
+ }
235
+ }
236
+ }
142
237
  }
143
238
 
144
239
  /**
@@ -152,3 +247,18 @@ export async function getDebugUrl(port) {
152
247
  const data = await res.json();
153
248
  return data.webSocketDebuggerUrl;
154
249
  }
250
+
251
+ /**
252
+ * Attach to a Chromium already running with --remote-debugging-port=<port>.
253
+ * Returns the same shape as launch() but with process: null and
254
+ * ownedProfileDir: null — cleanupBrowser() becomes a no-op so we never
255
+ * kill a browser we did not start or remove a profile we do not own.
256
+ * @param {object} opts
257
+ * @param {number} opts.port - The debug port the running browser is listening on
258
+ * @returns {Promise<{wsUrl: string, process: null, port: number, ownedProfileDir: null}>}
259
+ */
260
+ export async function attach({ port }) {
261
+ if (!port) throw new Error('attach({ port }) requires a port number');
262
+ const wsUrl = await getDebugUrl(port);
263
+ return { wsUrl, process: null, port, ownedProfileDir: null };
264
+ }
package/src/consent.js CHANGED
@@ -290,14 +290,9 @@ function findAcceptButton(dialogId, nodes, nodeMap, parentMap) {
290
290
  * Only matches strong patterns (not single-word fallbacks) to avoid false positives.
291
291
  */
292
292
  function tryGlobalConsentButton(nodes, session) {
293
- // Only use the specific multi-word patterns for global search
294
- const strictPatterns = ACCEPT_PATTERNS.filter((p) => {
295
- const src = p.source;
296
- return src.includes('\\s') || src.includes('\\b.*\\b.*\\b');
297
- });
298
-
299
- // Actually, let's just use all non-single-word patterns
300
- const safePatterns = ACCEPT_PATTERNS.slice(0, -3); // exclude ^accept$, ^agree$, ^ok$
293
+ // Multi-word patterns only exclude the bare ^accept$/^agree$/^ok$ from
294
+ // ACCEPT_PATTERNS so we don't false-match unrelated buttons page-wide.
295
+ const safePatterns = ACCEPT_PATTERNS.slice(0, -3);
301
296
 
302
297
  for (const pattern of safePatterns) {
303
298
  for (const node of nodes) {
package/src/daemon.js CHANGED
@@ -39,6 +39,7 @@ export async function startDaemon(opts, outputDir, initialUrl) {
39
39
  if (opts.proxy) args.push('--proxy', opts.proxy);
40
40
  if (opts.viewport) args.push('--viewport', opts.viewport);
41
41
  if (opts.storageState) args.push('--storage-state', opts.storageState);
42
+ if (opts.downloadPath) args.push('--download-path', opts.downloadPath);
42
43
 
43
44
  const child = spawn(process.execPath, args, {
44
45
  detached: true,
@@ -77,6 +78,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
77
78
  proxy: opts.proxy,
78
79
  viewport: opts.viewport,
79
80
  storageState: opts.storageState,
81
+ downloadPath: opts.downloadPath,
80
82
  });
81
83
 
82
84
  // Console log capture
@@ -208,6 +210,17 @@ export async function runDaemon(opts, outputDir, initialUrl) {
208
210
  return { ok: true };
209
211
  },
210
212
 
213
+ async reload({ ignoreCache }) {
214
+ await page.reload({ ignoreCache: !!ignoreCache });
215
+ return { ok: true };
216
+ },
217
+
218
+ async downloads() {
219
+ // Snapshot the array — callers want a static view at the moment of
220
+ // the request, not a reference that mutates under them.
221
+ return { ok: true, value: page.downloads.map((d) => ({ ...d })) };
222
+ },
223
+
211
224
  async drag({ fromRef, toRef }) {
212
225
  await page.drag(String(fromRef), String(toRef));
213
226
  return { ok: true };