@askjo/camofox-browser 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/reporter.js CHANGED
@@ -3,6 +3,8 @@
3
3
  // Config passed via createReporter(config) from lib/config.js.
4
4
 
5
5
  import crypto from 'crypto';
6
+ import fs from 'fs';
7
+ import { monitorEventLoopDelay } from 'perf_hooks';
6
8
 
7
9
  // ============================================================================
8
10
  // Anonymization
@@ -294,22 +296,57 @@ export function createUrlAnonymizer() {
294
296
  // Per-tab health tracker (count-only, no content)
295
297
  // ============================================================================
296
298
 
299
+ // Known bot-detection providers, matched by response header fingerprints.
300
+ // Order: most specific first.
301
+ const BOT_DETECTION_SIGNATURES = [
302
+ { header: 'cf-mitigated', value: 'challenge', provider: 'cloudflare' },
303
+ { header: 'x-datadome', provider: 'datadome' },
304
+ { header: 'x-px', provider: 'perimeterx' },
305
+ { header: 'x-distil-cs', provider: 'distil' },
306
+ { header: 'x-sucuri-id', provider: 'sucuri' },
307
+ { header: 'server', value: 'akamaighost', provider: 'akamai' },
308
+ // cf-ray is on ALL Cloudflare responses (even 200 OK). Must be last so it
309
+ // doesn't short-circuit other providers on multi-CDN sites.
310
+ { header: 'cf-ray', provider: 'cloudflare' },
311
+ ];
312
+
313
+ /**
314
+ * Detect bot-detection provider from Playwright response headers.
315
+ * Returns { detected: bool, provider: string|null, httpStatus: number|null }
316
+ */
317
+ export function detectBotProtection(response) {
318
+ if (!response) return { detected: false, provider: null, httpStatus: null };
319
+ const status = response.status();
320
+ let headers;
321
+ try { headers = response.headers(); } catch { return { detected: false, provider: null, httpStatus: status }; }
322
+ for (const sig of BOT_DETECTION_SIGNATURES) {
323
+ const val = headers[sig.header];
324
+ if (val !== undefined) {
325
+ if (sig.value && !val.toLowerCase().includes(sig.value)) continue;
326
+ const challenged = status === 403 || status === 429 || status === 503;
327
+ return { detected: challenged, provider: sig.provider, httpStatus: status };
328
+ }
329
+ }
330
+ return { detected: false, provider: null, httpStatus: status };
331
+ }
332
+
297
333
  /**
298
334
  * Create a health tracker for a tab. Attaches to Playwright page events.
299
- * Tracks: crashes, page errors, console errors, request failures,
300
- * dialog storms, redirect depth, HTTP status histogram, frame count.
335
+ * Tracks: crashes, page errors, request failures, redirect status codes,
336
+ * HTTP status histogram (4xx+), and anti-bot challenge detection.
301
337
  * All count-based — no URLs or content stored.
302
338
  */
303
339
  export function createTabHealthTracker(page) {
304
340
  const health = {
305
341
  crashes: 0,
306
342
  pageErrors: 0,
307
- consoleErrors: 0,
308
343
  requestFailures: 0,
309
- dialogCount: 0,
344
+ inflightRequests: 0,
310
345
  maxRedirectDepth: 0,
311
- statusCounts: {}, // { 403: 5, 429: 2, ... }
312
- frameCount: 0,
346
+ redirectStatusCodes: [], // status codes in redirect chain, e.g. [301, 302, 403]
347
+ statusCounts: {}, // { 403: 5, 429: 2, ... }
348
+ botDetection: null, // { detected, provider, httpStatus } from last nav response
349
+ lastNavResponseSize: 0,
313
350
  _redirectDepth: 0,
314
351
  };
315
352
 
@@ -319,13 +356,15 @@ export function createTabHealthTracker(page) {
319
356
  // Uncaught JS exceptions on the page
320
357
  page.on('pageerror', () => { health.pageErrors++; });
321
358
 
322
- // Console errors (rate, not content)
323
- page.on('console', (msg) => {
324
- if (msg.type() === 'error') health.consoleErrors++;
359
+ // Failed requests (blocked, DNS failure, etc.) + decrement in-flight counter
360
+ page.on('requestfailed', () => {
361
+ health.requestFailures++;
362
+ health.inflightRequests = Math.max(0, health.inflightRequests - 1);
325
363
  });
326
364
 
327
- // Failed requests (blocked, DNS failure, etc.)
328
- page.on('requestfailed', () => { health.requestFailures++; });
365
+ // Track in-flight requests for hang diagnostics
366
+ page.on('request', () => { health.inflightRequests++; });
367
+ page.on('requestfinished', () => { health.inflightRequests = Math.max(0, health.inflightRequests - 1); });
329
368
 
330
369
  // HTTP status tracking (non-2xx only)
331
370
  page.on('response', (resp) => {
@@ -333,13 +372,12 @@ export function createTabHealthTracker(page) {
333
372
  if (s >= 400) health.statusCounts[s] = (health.statusCounts[s] || 0) + 1;
334
373
  });
335
374
 
336
- // Dialog tracking (alert/confirm/prompt storms)
375
+ // Auto-dismiss dialogs to prevent page hangs (not tracked as a metric — noise)
337
376
  page.on('dialog', async (dialog) => {
338
- health.dialogCount++;
339
377
  try { await dialog.dismiss(); } catch { /* page might be closed */ }
340
378
  });
341
379
 
342
- // Redirect depth per navigation
380
+ // Redirect depth + status code chain per navigation
343
381
  page.on('request', (req) => {
344
382
  if (req.isNavigationRequest()) {
345
383
  if (req.redirectedFrom()) {
@@ -348,19 +386,120 @@ export function createTabHealthTracker(page) {
348
386
  health.maxRedirectDepth = health._redirectDepth;
349
387
  }
350
388
  } else {
351
- health._redirectDepth = 0; // new navigation, reset
389
+ health._redirectDepth = 0;
390
+ health.redirectStatusCodes = [];
391
+ health.inflightRequests = 0; // reset on new navigation to prevent drift
352
392
  }
353
393
  }
354
394
  });
355
395
 
396
+ // Capture redirect status codes and detect bot protection on nav responses
397
+ page.on('response', (resp) => {
398
+ try {
399
+ const req = resp.request();
400
+ if (req.isNavigationRequest()) {
401
+ health.redirectStatusCodes.push(resp.status());
402
+ health.botDetection = detectBotProtection(resp);
403
+ // Approximate response body size from content-length (no body read)
404
+ const cl = resp.headers()['content-length'];
405
+ if (cl) health.lastNavResponseSize = parseInt(cl, 10) || 0;
406
+ }
407
+ } catch { /* page closed */ }
408
+ });
409
+
356
410
  /** Snapshot current health counters for inclusion in reports. */
357
411
  function snapshot() {
358
- try { health.frameCount = page.frames().length; } catch { /* closed */ }
359
412
  const { _redirectDepth, ...clean } = health;
360
413
  return { ...clean };
361
414
  }
362
415
 
363
- return { health, snapshot };
416
+ /**
417
+ * Get document.readyState from the page. Returns null if page is unresponsive.
418
+ * Use a tight timeout — if the renderer is crashed, evaluate will hang.
419
+ */
420
+ async function getReadyState() {
421
+ try {
422
+ return await Promise.race([
423
+ page.evaluate(() => document.readyState),
424
+ new Promise(resolve => setTimeout(() => resolve('unresponsive'), 1000)),
425
+ ]);
426
+ } catch {
427
+ return 'unresponsive';
428
+ }
429
+ }
430
+
431
+ return { health, snapshot, getReadyState };
432
+ }
433
+
434
+ // ============================================================================
435
+ // Process resource snapshot (memory, handles, FDs, browser RSS)
436
+ // ============================================================================
437
+
438
+ /**
439
+ * Collect process-level resource metrics. Safe to call at any time.
440
+ * Returns anonymized metrics — no PIDs, paths, or user data.
441
+ */
442
+ export function collectResourceSnapshot(opts = {}) {
443
+ const mem = process.memoryUsage();
444
+ const snap = {
445
+ nodeRssMb: Math.round(mem.rss / 1048576),
446
+ nodeHeapUsedMb: Math.round(mem.heapUsed / 1048576),
447
+ nodeHeapTotalMb: Math.round(mem.heapTotal / 1048576),
448
+ nodeExternalMb: Math.round(mem.external / 1048576),
449
+ eventLoopLagMs: null,
450
+ activeHandles: null,
451
+ activeRequests: null,
452
+ openFds: null,
453
+ browserRssMb: null,
454
+ };
455
+
456
+ // Active libuv handles/requests (private API, guarded)
457
+ try { snap.activeHandles = process._getActiveHandles().length; } catch { /* unavailable */ }
458
+ try { snap.activeRequests = process._getActiveRequests().length; } catch { /* unavailable */ }
459
+
460
+ // Open file descriptors (Linux only)
461
+ try {
462
+ if (process.platform === 'linux') {
463
+ snap.openFds = fs.readdirSync('/proc/self/fd').length;
464
+ }
465
+ } catch { /* not available or permission denied */ }
466
+
467
+ // Browser process RSS (the one people miss — browser OOMs, not Node)
468
+ if (opts.browserPid && Number.isInteger(opts.browserPid) && opts.browserPid > 0) {
469
+ try {
470
+ if (process.platform === 'linux') {
471
+ const status = fs.readFileSync(`/proc/${opts.browserPid}/status`, 'utf8');
472
+ const match = status.match(/VmRSS:\s+(\d+)\s+kB/);
473
+ if (match) snap.browserRssMb = Math.round(parseInt(match[1], 10) / 1024);
474
+ } else if (process.platform === 'darwin') {
475
+ const out = execSync(`ps -o rss= -p ${opts.browserPid}`, { timeout: 1000 }).toString().trim();
476
+ if (out) snap.browserRssMb = Math.round(parseInt(out, 10) / 1024);
477
+ }
478
+ } catch { /* process gone or permission denied */ }
479
+ }
480
+
481
+ // Session/tab counts from caller
482
+ if (opts.sessionCount != null) snap.browserContexts = opts.sessionCount;
483
+ if (opts.tabCount != null) snap.activeTabs = opts.tabCount;
484
+
485
+ return snap;
486
+ }
487
+
488
+ /**
489
+ * Classify proxy errors from Playwright navigation error messages.
490
+ * Returns { proxyError: string|null, proxyTlsError: bool } — no IPs or credentials.
491
+ */
492
+ export function classifyProxyError(errorMessage) {
493
+ if (!errorMessage || typeof errorMessage !== 'string') return { proxyError: null, proxyTlsError: false };
494
+ const msg = errorMessage.toUpperCase();
495
+ // Explicit proxy errors from Chromium/Firefox net stack
496
+ if (msg.includes('ERR_PROXY_CONNECTION_FAILED')) return { proxyError: 'ERR_PROXY_CONNECTION_FAILED', proxyTlsError: false };
497
+ if (msg.includes('ERR_TUNNEL_CONNECTION_FAILED')) return { proxyError: 'ERR_TUNNEL_CONNECTION_FAILED', proxyTlsError: false };
498
+ if (msg.includes('ERR_PROXY_AUTH_REQUESTED') || msg.includes('407')) return { proxyError: 'ERR_PROXY_AUTH_REQUESTED', proxyTlsError: false };
499
+ if (msg.includes('ERR_PROXY_CERTIFICATE_INVALID') || (msg.includes('PROXY') && msg.includes('SSL'))) return { proxyError: 'ERR_PROXY_TLS', proxyTlsError: true };
500
+ if (msg.includes('ECONNREFUSED') && msg.includes('PROXY')) return { proxyError: 'ECONNREFUSED', proxyTlsError: false };
501
+ if (msg.includes('ETIMEDOUT') && msg.includes('PROXY')) return { proxyError: 'ETIMEDOUT', proxyTlsError: false };
502
+ return { proxyError: null, proxyTlsError: false };
364
503
  }
365
504
 
366
505
  // ============================================================================
@@ -510,24 +649,95 @@ function formatIssueBody(type, detail) {
510
649
  const sections = [
511
650
  '> Auto-reported by ' + _GH_USER_AGENT + '. All data is anonymized.',
512
651
  '',
513
- `**Type:** ${type}`,
514
- `**Version:** ${detail.version || 'unknown'}`,
515
- `**Node:** ${detail.nodeVersion || 'unknown'}`,
516
- `**Platform:** ${detail.platform || 'unknown'}`,
517
- `**Uptime:** ${detail.uptimeMinutes != null ? detail.uptimeMinutes + ' min' : 'unknown'}`,
652
+ '## Environment',
653
+ `- **version:** ${detail.version || 'unknown'}`,
654
+ `- **node:** ${detail.nodeVersion || 'unknown'}`,
655
+ `- **platform:** ${detail.platform || 'unknown'}`,
656
+ `- **uptime:** ${detail.uptimeMinutes != null ? detail.uptimeMinutes + ' min' : 'unknown'}`,
518
657
  ];
519
658
 
659
+ // Resource snapshot (memory, handles, browser RSS)
660
+ const r = detail.resources;
661
+ if (r) {
662
+ sections.push('', '## Resources');
663
+ sections.push(`- **node RSS:** ${r.nodeRssMb ?? '?'} MB`);
664
+ sections.push(`- **node heap:** ${r.nodeHeapUsedMb ?? '?'} / ${r.nodeHeapTotalMb ?? '?'} MB`);
665
+ if (r.browserRssMb != null) sections.push(`- **browser RSS:** ${r.browserRssMb} MB`);
666
+ if (r.browserContexts != null) sections.push(`- **browser contexts:** ${r.browserContexts}`);
667
+ if (r.activeTabs != null) sections.push(`- **active tabs:** ${r.activeTabs}`);
668
+ if (r.openFds != null) sections.push(`- **open FDs:** ${r.openFds}`);
669
+ if (r.activeHandles != null) sections.push(`- **active handles:** ${r.activeHandles}`);
670
+ if (r.eventLoopLagMs != null) sections.push(`- **event loop lag:** ${r.eventLoopLagMs} ms`);
671
+ }
672
+
673
+ // Error info
674
+ if (detail.signal) sections.push('', `**Signal:** ${detail.signal}`);
675
+ if (detail.activeRoute) sections.push(`**Active route:** ${detail.activeRoute}`);
520
676
  if (detail.message) {
521
- sections.push('', '### Error', '```', anonymize(detail.message), '```');
677
+ sections.push('', '## Error', '```', anonymize(detail.message), '```');
522
678
  }
523
679
  if (detail.stack) {
524
- sections.push('', '### Stack Trace', '```', anonymize(detail.stack), '```');
680
+ sections.push('', '## Stack Trace', '```', anonymize(detail.stack), '```');
681
+ }
682
+
683
+ // Hang-specific details
684
+ if (detail.hang) {
685
+ const h = detail.hang;
686
+ sections.push('', '## Hang Details');
687
+ sections.push(`- **operation:** ${h.operation}`);
688
+ sections.push(`- **duration:** ${Math.round(h.durationMs / 1000)}s`);
689
+ if (h.lockQueueMs != null) sections.push(`- **lock queue wait:** ${Math.round(h.lockQueueMs)}ms`);
690
+ if (h.documentReadyState) sections.push(`- **document.readyState:** ${h.documentReadyState}`);
691
+ if (h.inflightRequests != null) sections.push(`- **in-flight requests:** ${h.inflightRequests}`);
692
+ }
693
+
694
+ // Anti-bot detection
695
+ if (detail.botDetection?.detected) {
696
+ const b = detail.botDetection;
697
+ sections.push('', '## Anti-Bot Detection');
698
+ sections.push(`- **provider:** ${b.provider || 'unknown'}`);
699
+ sections.push(`- **HTTP status:** ${b.httpStatus || '?'}`);
700
+ if (b.responseBodySizeKb != null) sections.push(`- **response size:** ${b.responseBodySizeKb} KB`);
701
+ if (b.redirectChainLength != null) sections.push(`- **redirect chain:** ${b.redirectChainLength} hops`);
702
+ if (b.redirectStatusCodes?.length) sections.push(`- **redirect statuses:** ${b.redirectStatusCodes.join(' → ')}`);
525
703
  }
526
- if (detail.context) {
527
- sections.push('', '### Context', '```', anonymize(JSON.stringify(detail.context, null, 2)), '```');
704
+
705
+ // Proxy info (safe fields only no IPs, credentials, or hostnames)
706
+ if (detail.proxy) {
707
+ const p = detail.proxy;
708
+ sections.push('', '## Proxy');
709
+ sections.push(`- **configured:** ${p.configured}`);
710
+ if (p.configured) {
711
+ if (p.type) sections.push(`- **type:** ${p.type}`);
712
+ sections.push(`- **auth configured:** ${p.authConfigured ?? 'unknown'}`);
713
+ if (p.error) sections.push(`- **error:** ${p.error}`);
714
+ if (p.tlsError) sections.push(`- **TLS error:** yes`);
715
+ }
716
+ }
717
+
718
+ // Stall-specific details
719
+ if (detail.stall) {
720
+ const s = detail.stall;
721
+ sections.push('', '## Stall Details');
722
+ sections.push(`- **stall duration:** ${Math.round(s.driftMs / 1000)}s`);
723
+ if (s.classification) sections.push(`- **classification:** ${s.classification}`);
724
+ if (s.cpuElapsedS != null) sections.push(`- **CPU time during stall:** ${s.cpuElapsedS}s`);
725
+ if (s.cpuRatio != null) sections.push(`- **CPU/wall ratio:** ${s.cpuRatio}`);
726
+ if (s.sigcontInWindow != null) sections.push(`- **SIGCONT in window:** ${s.sigcontInWindow}`);
727
+ if (s.hrtimeWallDriftS != null) sections.push(`- **hrtime↔wall drift:** ${s.hrtimeWallDriftS}s`);
728
+ if (s.eventLoopDelay) {
729
+ const eld = s.eventLoopDelay;
730
+ sections.push(`- **event loop delay:** p50=${eld.p50Ms}ms p99=${eld.p99Ms}ms max=${eld.maxMs}ms`);
731
+ }
732
+ if (s.lastRoute) sections.push(`- **last route:** ${s.lastRoute}`);
733
+ if (s.activeHandles != null) sections.push(`- **active handles:** ${s.activeHandles}`);
734
+ if (s.activeRequests != null) sections.push(`- **active requests:** ${s.activeRequests}`);
735
+ if (s.heapDeltaMb != null) sections.push(`- **heap delta:** ${s.heapDeltaMb > 0 ? '+' : ''}${s.heapDeltaMb} MB`);
528
736
  }
529
- if (detail.metrics) {
530
- sections.push('', '### Metrics', '```json', JSON.stringify(detail.metrics, null, 2), '```');
737
+
738
+ // Context (misc extra data)
739
+ if (detail.context && Object.keys(detail.context).length > 0) {
740
+ sections.push('', '<details><summary>Context</summary>', '', '```json', anonymize(JSON.stringify(detail.context, null, 2)), '```', '', '</details>');
531
741
  }
532
742
 
533
743
  return sections.join('\n');
@@ -539,6 +749,14 @@ function formatCommentBody(type, detail) {
539
749
  `**+1** — ${ts}`,
540
750
  `Version: ${detail.version || 'unknown'}, Uptime: ${detail.uptimeMinutes != null ? detail.uptimeMinutes + ' min' : '?'}`,
541
751
  ];
752
+ // Include resource snapshot in +1 comments too
753
+ const r = detail.resources;
754
+ if (r) {
755
+ const parts = [`RSS: ${r.nodeRssMb ?? '?'}MB`];
756
+ if (r.browserRssMb != null) parts.push(`Browser: ${r.browserRssMb}MB`);
757
+ if (r.activeTabs != null) parts.push(`Tabs: ${r.activeTabs}`);
758
+ lines.push(parts.join(', '));
759
+ }
542
760
  if (detail.message) {
543
761
  lines.push('```', anonymize(detail.message).slice(0, 500), '```');
544
762
  }
@@ -575,9 +793,13 @@ export function createReporter(config) {
575
793
  const version = config.version || 'unknown';
576
794
 
577
795
  let watchdogInterval = null;
796
+ let _resetNativeMemBaseline = false; // Set by resetNativeMemBaseline(), read by watchdog
578
797
  let lastTick = Date.now();
579
798
  const inFlight = new Set();
580
799
 
800
+ // Track last Express route for stall reports
801
+ let _lastRoute = null;
802
+
581
803
  // No-op when disabled
582
804
  if (!enabled) {
583
805
  return {
@@ -585,6 +807,7 @@ export function createReporter(config) {
585
807
  reportHang: async () => {},
586
808
  reportStuckLoop: async () => {},
587
809
  startWatchdog: () => {},
810
+ trackRoute: () => {},
588
811
  stop: () => {},
589
812
  _anonymize: anonymize,
590
813
  _stackSignature: stackSignature,
@@ -592,7 +815,7 @@ export function createReporter(config) {
592
815
  }
593
816
 
594
817
  /** Core: file or deduplicate a report. NEVER throws. */
595
- async function fileReport(type, label, detail) {
818
+ async function fileReport(type, labels, detail) {
596
819
  if (!rateLimiter.tryAcquire()) return;
597
820
 
598
821
  const reportPromise = (async () => {
@@ -619,7 +842,8 @@ export function createReporter(config) {
619
842
  platform: typeof process !== 'undefined' ? process.platform : 'unknown',
620
843
  });
621
844
 
622
- await createIssue(repo, title, body, [label, 'auto-report']);
845
+ const issueLabels = Array.isArray(labels) ? labels : [labels, 'auto-report'];
846
+ await createIssue(repo, title, body, issueLabels);
623
847
  } catch {
624
848
  // Swallow — reporter must never crash the server
625
849
  }
@@ -629,19 +853,32 @@ export function createReporter(config) {
629
853
  reportPromise.finally(() => inFlight.delete(reportPromise));
630
854
  }
631
855
 
856
+ /**
857
+ * Track the last Express route for stall diagnostics.
858
+ * Call from middleware: reporter.trackRoute(req.method + ' ' + req.route?.path)
859
+ */
860
+ function trackRoute(route) {
861
+ _lastRoute = route || null;
862
+ }
863
+
632
864
  async function reportCrash(error, opts = {}) {
633
865
  const err = error instanceof Error ? error : new Error(String(error));
634
866
  const uptimeMinutes = typeof process !== 'undefined'
635
867
  ? Math.round(process.uptime() / 60) : undefined;
868
+ const resources = collectResourceSnapshot(opts.resourceOpts || {});
636
869
 
637
870
  await fileReport(
638
871
  opts.signal ? `signal:${opts.signal}` : (err.name || 'crash'),
639
- 'crash',
872
+ ['crash', 'auto-report'],
640
873
  {
641
874
  error: err,
642
875
  message: err.message,
643
876
  stack: err.stack,
877
+ signal: opts.signal || null,
878
+ activeRoute: _lastRoute,
644
879
  uptimeMinutes,
880
+ resources,
881
+ proxy: opts.proxy || null,
645
882
  context: opts.context,
646
883
  },
647
884
  );
@@ -650,32 +887,55 @@ export function createReporter(config) {
650
887
  async function reportHang(operation, durationMs, opts = {}) {
651
888
  const uptimeMinutes = typeof process !== 'undefined'
652
889
  ? Math.round(process.uptime() / 60) : undefined;
890
+ const resources = collectResourceSnapshot(opts.resourceOpts || {});
653
891
 
654
- // Create per-report URL anonymizer (fresh salt each time)
655
- const urlAnon = createUrlAnonymizer();
656
- const context = { operation, durationMs, ...opts.context };
657
-
658
- // Anonymize any URLs in the journal
892
+ // Build lean context (journal only, no redundant fields)
893
+ const context = { ...opts.context };
659
894
  if (context.journal) {
660
- context.journal = context.journal.map(j => {
661
- if (typeof j === 'string') return j; // already "type:action" format
662
- return j;
663
- });
895
+ context.journal = context.journal.map(j => typeof j === 'string' ? j : j);
896
+ }
897
+ // Remove fields that now have dedicated sections
898
+ delete context.operation;
899
+ delete context.durationMs;
900
+
901
+ // Anti-bot detection from health snapshot
902
+ const healthSnap = opts.healthSnapshot;
903
+ const botDetection = healthSnap?.botDetection?.detected ? {
904
+ ...healthSnap.botDetection,
905
+ responseBodySizeKb: healthSnap.lastNavResponseSize
906
+ ? Math.round(healthSnap.lastNavResponseSize / 1024) : null,
907
+ redirectChainLength: healthSnap.redirectStatusCodes?.length || null,
908
+ redirectStatusCodes: healthSnap.redirectStatusCodes?.length
909
+ ? healthSnap.redirectStatusCodes : null,
910
+ } : null;
911
+
912
+ // Get document.readyState if healthTracker provided
913
+ let documentReadyState = null;
914
+ if (opts.healthTracker?.getReadyState) {
915
+ documentReadyState = await opts.healthTracker.getReadyState();
664
916
  }
665
- // Include anonymized URL if provided
666
- if (opts.url) context.url = urlAnon.anonymizeUrl(opts.url);
667
- if (opts.redirectChain) context.redirectChain = urlAnon.anonymizeChain(opts.redirectChain);
668
917
 
669
- // Include tab health snapshot if provided
670
- if (opts.healthSnapshot) context.health = opts.healthSnapshot;
918
+ const labels = ['hang', 'auto-report'];
919
+ if (botDetection?.detected) labels.push('bot-detection');
671
920
 
672
921
  await fileReport(
673
922
  `hang:${operation}`,
674
- 'hang',
923
+ labels,
675
924
  {
676
925
  message: `Operation "${operation}" hung for ${Math.round(durationMs / 1000)}s`,
677
926
  stack: opts.error?.stack,
927
+ activeRoute: _lastRoute,
678
928
  uptimeMinutes,
929
+ resources,
930
+ hang: {
931
+ operation,
932
+ durationMs,
933
+ lockQueueMs: opts.lockQueueMs ?? null,
934
+ documentReadyState,
935
+ inflightRequests: healthSnap?.inflightRequests ?? null,
936
+ },
937
+ botDetection,
938
+ proxy: opts.proxy || null,
679
939
  context,
680
940
  },
681
941
  );
@@ -684,13 +944,15 @@ export function createReporter(config) {
684
944
  async function reportStuckLoop(durationMs, opts = {}) {
685
945
  const uptimeMinutes = typeof process !== 'undefined'
686
946
  ? Math.round(process.uptime() / 60) : undefined;
947
+ const resources = collectResourceSnapshot(opts.resourceOpts || {});
687
948
 
688
949
  await fileReport(
689
950
  'stuck:tab-lock',
690
- 'stuck',
951
+ ['stuck', 'auto-report'],
691
952
  {
692
953
  message: `Tab lock held for ${Math.round(durationMs / 1000)}s (tab destroyed)`,
693
954
  uptimeMinutes,
955
+ resources,
694
956
  context: { durationMs, ...opts.context },
695
957
  },
696
958
  );
@@ -701,21 +963,186 @@ export function createReporter(config) {
701
963
 
702
964
  const checkMs = 1000;
703
965
  lastTick = Date.now();
966
+ let lastCpuUsage = process.cpuUsage();
967
+ let lastHrtime = process.hrtime.bigint();
968
+ let lastHeapUsed = process.memoryUsage().heapUsed;
969
+
970
+ // --- Native memory leak tracking ---
971
+ // Track RSS minus JS heap over time to detect native/external memory leaks.
972
+ // Sample every 30s, alert if native memory grows by >200MB from baseline.
973
+ let nativeMemBaseline = null; // RSS - heapUsed at first measurement
974
+ let nativeMemHighWater = 0;
975
+ let lastNativeMemCheck = 0;
976
+ const NATIVE_MEM_CHECK_INTERVAL_MS = 30_000;
977
+ const NATIVE_MEM_LEAK_THRESHOLD_MB = 200; // alert if native mem exceeds baseline by this much
978
+ let nativeMemAlertFired = false;
979
+
980
+ // SIGCONT detection — macOS sends SIGCONT on wake from sleep/suspend
981
+ let lastSigcont = 0;
982
+ try { process.on('SIGCONT', () => { lastSigcont = Date.now(); }); } catch { /* unavailable */ }
983
+
984
+ // Event loop delay histogram (perf_hooks) — correlating evidence
985
+ let elHistogram = null;
986
+ try {
987
+ elHistogram = monitorEventLoopDelay({ resolution: 20 });
988
+ elHistogram.enable();
989
+ } catch { /* unavailable */ }
990
+
991
+ // Suppress false positives from OS sleep/suspend (laptop lid close, VM pause).
992
+ // Stalls > 120s are almost certainly not event-loop bugs.
993
+ const MAX_REPORTABLE_DRIFT_MS = 120_000;
994
+ let suppressTicksRemaining = 0;
995
+ const SUPPRESS_TICKS_AFTER_WAKE = 5;
704
996
 
705
997
  watchdogInterval = setInterval(() => {
706
998
  const now = Date.now();
707
999
  const drift = now - lastTick - checkMs;
1000
+ const cpuDelta = process.cpuUsage(lastCpuUsage);
1001
+ const hrtimeNow = process.hrtime.bigint();
1002
+ const hrtimeDeltaMs = Number(hrtimeNow - lastHrtime) / 1e6;
1003
+
708
1004
  lastTick = now;
1005
+ lastCpuUsage = process.cpuUsage();
1006
+ lastHrtime = hrtimeNow;
1007
+
1008
+ // After a long sleep/suspend, suppress the next few ticks (post-wake jitter)
1009
+ if (drift > MAX_REPORTABLE_DRIFT_MS) {
1010
+ suppressTicksRemaining = SUPPRESS_TICKS_AFTER_WAKE;
1011
+ lastHeapUsed = process.memoryUsage().heapUsed;
1012
+ return;
1013
+ }
1014
+ if (suppressTicksRemaining > 0) {
1015
+ suppressTicksRemaining--;
1016
+ lastHeapUsed = process.memoryUsage().heapUsed;
1017
+ return;
1018
+ }
1019
+
1020
+ // --- Native memory leak detection (runs every ~30s) ---
1021
+ if (now - lastNativeMemCheck >= NATIVE_MEM_CHECK_INTERVAL_MS) {
1022
+ lastNativeMemCheck = now;
1023
+ try {
1024
+ // Check if baseline should be reset (e.g. after browser close)
1025
+ if (_resetNativeMemBaseline) {
1026
+ nativeMemBaseline = null;
1027
+ nativeMemHighWater = 0;
1028
+ nativeMemAlertFired = false;
1029
+ _resetNativeMemBaseline = false;
1030
+ }
1031
+ const mem = process.memoryUsage();
1032
+ const nativeMemMb = Math.round((mem.rss - mem.heapUsed) / 1048576);
1033
+ if (nativeMemBaseline === null) {
1034
+ nativeMemBaseline = nativeMemMb;
1035
+ }
1036
+ nativeMemHighWater = Math.max(nativeMemHighWater, nativeMemMb);
1037
+ const growth = nativeMemMb - nativeMemBaseline;
1038
+
1039
+ if (growth > NATIVE_MEM_LEAK_THRESHOLD_MB && !nativeMemAlertFired) {
1040
+ nativeMemAlertFired = true;
1041
+ let extra = {};
1042
+ try { if (getContext) extra = getContext(); } catch { /* swallow */ }
1043
+ const resources = collectResourceSnapshot(extra.resourceOpts || {});
1044
+ delete extra.resourceOpts;
1045
+
1046
+ fileReport('leak:native-memory', ['auto-report', 'memory-leak'], {
1047
+ message: `Native memory grew by ${growth}MB (baseline: ${nativeMemBaseline}MB, current: ${nativeMemMb}MB, high-water: ${nativeMemHighWater}MB)`,
1048
+ uptimeMinutes: Math.round(process.uptime() / 60),
1049
+ resources,
1050
+ nativeMemory: {
1051
+ baselineMb: nativeMemBaseline,
1052
+ currentMb: nativeMemMb,
1053
+ highWaterMb: nativeMemHighWater,
1054
+ growthMb: growth,
1055
+ rssMb: Math.round(mem.rss / 1048576),
1056
+ heapUsedMb: Math.round(mem.heapUsed / 1048576),
1057
+ externalMb: Math.round(mem.external / 1048576),
1058
+ },
1059
+ context: extra,
1060
+ });
1061
+ }
1062
+ } catch { /* swallow */ }
1063
+ }
709
1064
 
710
1065
  if (drift > thresholdMs) {
1066
+ // CPU time consumed during the stall interval (user + system, in seconds)
1067
+ const cpuElapsedS = (cpuDelta.user + cpuDelta.system) / 1e6;
1068
+ const wallElapsedS = drift / 1000;
1069
+ const cpuRatio = wallElapsedS > 0 ? cpuElapsedS / wallElapsedS : 0;
1070
+
1071
+ // SIGCONT within the stall window = OS sleep/resume
1072
+ const sigcontInWindow = lastSigcont > 0 && (now - lastSigcont) < drift + 2000;
1073
+
1074
+ // hrtime vs wall clock drift (macOS: hrtime doesn't advance during sleep)
1075
+ const hrtimeWallDriftS = Math.abs((drift - (hrtimeDeltaMs - checkMs))) / 1000;
1076
+
1077
+ // Classify: sleep vs real stall
1078
+ let classification;
1079
+ if (cpuRatio < 0.01 && sigcontInWindow) classification = 'sleep';
1080
+ else if (cpuRatio < 0.001) classification = 'likely_sleep';
1081
+ else if (cpuRatio < 0.01) classification = 'likely_sleep';
1082
+ else if (cpuRatio > 0.1) classification = 'real_stall';
1083
+ else classification = 'ambiguous';
1084
+
1085
+ // Don't file reports for definitive sleep
1086
+ if (classification === 'sleep') {
1087
+ lastHeapUsed = process.memoryUsage().heapUsed;
1088
+ return;
1089
+ }
1090
+
1091
+ // Capture heap delta during stall (GC indicator)
1092
+ const currentHeap = process.memoryUsage().heapUsed;
1093
+ const heapDeltaMb = Math.round((currentHeap - lastHeapUsed) / 1048576);
1094
+ lastHeapUsed = currentHeap;
1095
+
711
1096
  let extra = {};
712
1097
  try { if (getContext) extra = getContext(); } catch { /* swallow */ }
713
- fileReport('stuck:event-loop', 'stuck', {
1098
+
1099
+ const resources = collectResourceSnapshot(extra.resourceOpts || {});
1100
+ // Remove resourceOpts from extra so it doesn't end up in context
1101
+ delete extra.resourceOpts;
1102
+
1103
+ // Event loop delay histogram snapshot
1104
+ let elDelay = null;
1105
+ if (elHistogram) {
1106
+ try {
1107
+ elDelay = {
1108
+ p50Ms: Math.round(elHistogram.percentile(50) / 1e6),
1109
+ p99Ms: Math.round(elHistogram.percentile(99) / 1e6),
1110
+ maxMs: Math.round(elHistogram.max / 1e6),
1111
+ };
1112
+ elHistogram.reset();
1113
+ } catch { /* unavailable */ }
1114
+ }
1115
+
1116
+ const labels = ['stuck', 'auto-report'];
1117
+ if (classification === 'likely_sleep') labels.push('likely-sleep');
1118
+
1119
+ fileReport('stuck:event-loop', labels, {
714
1120
  message: `Event loop stalled for ${Math.round(drift / 1000)}s (threshold: ${Math.round(thresholdMs / 1000)}s)`,
715
1121
  uptimeMinutes: typeof process !== 'undefined'
716
1122
  ? Math.round(process.uptime() / 60) : undefined,
717
- context: { driftMs: drift, thresholdMs, ...extra },
1123
+ resources,
1124
+ stall: {
1125
+ driftMs: drift,
1126
+ thresholdMs,
1127
+ classification,
1128
+ cpuElapsedS: Math.round(cpuElapsedS * 1000) / 1000,
1129
+ cpuRatio: Math.round(cpuRatio * 10000) / 10000,
1130
+ sigcontInWindow,
1131
+ hrtimeWallDriftS: Math.round(hrtimeWallDriftS * 100) / 100,
1132
+ eventLoopDelay: elDelay,
1133
+ lastRoute: _lastRoute,
1134
+ activeHandles: resources.activeHandles,
1135
+ activeRequests: resources.activeRequests,
1136
+ heapDeltaMb,
1137
+ nativeMemGrowthMb: nativeMemBaseline !== null
1138
+ ? Math.round((resources.nodeRssMb - resources.nodeHeapUsedMb) - nativeMemBaseline)
1139
+ : null,
1140
+ nativeMemBaselineMb: nativeMemBaseline,
1141
+ },
1142
+ context: extra,
718
1143
  });
1144
+ } else {
1145
+ lastHeapUsed = process.memoryUsage().heapUsed;
719
1146
  }
720
1147
  }, checkMs);
721
1148
 
@@ -730,12 +1157,24 @@ export function createReporter(config) {
730
1157
  return Promise.allSettled([...inFlight]);
731
1158
  }
732
1159
 
1160
+ /**
1161
+ * Reset native memory baseline. Call after browser close so the next
1162
+ * browser session measures from a fresh baseline, not the old one.
1163
+ */
1164
+ function resetNativeMemBaseline() {
1165
+ // These are closure vars in startWatchdog — we need to reach them.
1166
+ // Since this runs in the same module, we set a flag the watchdog reads.
1167
+ _resetNativeMemBaseline = true;
1168
+ }
1169
+
733
1170
  return {
734
1171
  reportCrash,
735
1172
  reportHang,
736
1173
  reportStuckLoop,
737
1174
  startWatchdog,
1175
+ trackRoute,
738
1176
  stop,
1177
+ resetNativeMemBaseline,
739
1178
  _anonymize: anonymize,
740
1179
  _stackSignature: stackSignature,
741
1180
  _rateLimiter: rateLimiter,
@@ -1,11 +1,17 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
+ import os from 'os';
3
4
 
4
5
  const ORPHAN_PATTERNS = [
5
6
  /^\.fea5[a-f0-9]+\.so$/,
6
7
  /^\.5ef7[a-f0-9]+\.node$/,
7
8
  ];
8
9
 
10
+ // Firefox temp profile directories created by Playwright/Camoufox
11
+ const FIREFOX_PROFILE_PATTERN = /^playwright_firefoxdev_profile-/;
12
+ // Camoufox also creates these
13
+ const CAMOUFOX_TMP_PATTERN = /^camoufox[-_]/;
14
+
9
15
  export function cleanupOrphanedTempFiles({ tmpDir, minAgeMs = 5 * 60 * 1000, now = Date.now() } = {}) {
10
16
  const result = { scanned: 0, removed: 0, bytes: 0, skipped: 0 };
11
17
  if (!tmpDir) return result;
@@ -38,3 +44,65 @@ export function cleanupOrphanedTempFiles({ tmpDir, minAgeMs = 5 * 60 * 1000, now
38
44
 
39
45
  return result;
40
46
  }
47
+
48
+ /**
49
+ * Clean up stale Firefox/Camoufox temp profile directories.
50
+ * These accumulate when browser.close() doesn't fully clean up
51
+ * (especially with enable_cache: true). Each profile can be 10-100MB+.
52
+ *
53
+ * Only removes profiles older than minAgeMs (default 2 minutes)
54
+ * to avoid killing profiles belonging to an actively launching browser.
55
+ */
56
+ export function cleanupStaleFirefoxProfiles({ tmpDir, minAgeMs = 2 * 60 * 1000, now = Date.now() } = {}) {
57
+ const dir = tmpDir || os.tmpdir();
58
+ const result = { scanned: 0, removed: 0, bytes: 0, skipped: 0 };
59
+
60
+ let entries;
61
+ try {
62
+ entries = fs.readdirSync(dir);
63
+ } catch {
64
+ return result;
65
+ }
66
+
67
+ for (const name of entries) {
68
+ if (!FIREFOX_PROFILE_PATTERN.test(name) && !CAMOUFOX_TMP_PATTERN.test(name)) continue;
69
+ result.scanned++;
70
+ const full = path.join(dir, name);
71
+ try {
72
+ const st = fs.statSync(full);
73
+ if (!st.isDirectory()) continue;
74
+ if (now - st.mtimeMs < minAgeMs) {
75
+ result.skipped++;
76
+ continue;
77
+ }
78
+ // Calculate directory size before removing
79
+ const dirBytes = _dirSizeSync(full);
80
+ fs.rmSync(full, { recursive: true, force: true, maxRetries: 3 });
81
+ result.removed++;
82
+ result.bytes += dirBytes;
83
+ } catch {
84
+ // directory vanished, permission denied, or in-use — skip
85
+ }
86
+ }
87
+
88
+ return result;
89
+ }
90
+
91
+ /** Recursively calculate directory size (best effort, fast). */
92
+ function _dirSizeSync(dirPath) {
93
+ let total = 0;
94
+ try {
95
+ const entries = fs.readdirSync(dirPath, { withFileTypes: true });
96
+ for (const entry of entries) {
97
+ const full = path.join(dirPath, entry.name);
98
+ try {
99
+ if (entry.isDirectory()) {
100
+ total += _dirSizeSync(full);
101
+ } else {
102
+ total += fs.statSync(full).size;
103
+ }
104
+ } catch { /* skip */ }
105
+ }
106
+ } catch { /* skip */ }
107
+ return total;
108
+ }
@@ -2,7 +2,7 @@
2
2
  "id": "camofox-browser",
3
3
  "name": "Camofox Browser",
4
4
  "description": "Anti-detection browser automation for AI agents using Camoufox (Firefox-based)",
5
- "version": "1.7.1",
5
+ "version": "1.7.3",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "properties": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@askjo/camofox-browser",
3
- "version": "1.7.1",
3
+ "version": "1.7.3",
4
4
  "description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -62,7 +62,7 @@
62
62
  "plugin": "node scripts/plugin.js",
63
63
  "generate-openapi": "node scripts/generate-openapi.js",
64
64
  "version:sync": "node scripts/sync-version.js",
65
- "version": "node scripts/sync-version.js && git add openclaw.plugin.json",
65
+ "version": "node scripts/sync-version.js && node scripts/generate-openapi.js && git add openclaw.plugin.json openapi.json",
66
66
  "postinstall": "npx camoufox-js fetch || true"
67
67
  },
68
68
  "dependencies": {
package/server.js CHANGED
@@ -31,9 +31,9 @@ import {
31
31
  startMemoryReporter, stopMemoryReporter,
32
32
  } from './lib/metrics.js';
33
33
  import { actionFromReq, classifyError } from './lib/request-utils.js';
34
- import { cleanupOrphanedTempFiles } from './lib/tmp-cleanup.js';
34
+ import { cleanupOrphanedTempFiles, cleanupStaleFirefoxProfiles } from './lib/tmp-cleanup.js';
35
35
  import { coalesceInflight } from './lib/inflight.js';
36
- import { createReporter, createTabHealthTracker } from './lib/reporter.js';
36
+ import { createReporter, createTabHealthTracker, collectResourceSnapshot, classifyProxyError } from './lib/reporter.js';
37
37
  import { mountDocs } from './lib/openapi.js';
38
38
 
39
39
  const CONFIG = loadConfig();
@@ -42,18 +42,32 @@ const CONFIG = loadConfig();
42
42
  import { readFileSync } from 'fs';
43
43
  const _pkgVersion = (() => { try { return JSON.parse(readFileSync(new URL('./package.json', import.meta.url), 'utf8')).version; } catch { return 'unknown'; } })();
44
44
  const reporter = createReporter({ ...CONFIG, version: _pkgVersion });
45
+ function _countTabs() {
46
+ let total = 0;
47
+ for (const session of sessions.values()) {
48
+ for (const group of session.tabGroups.values()) total += group.size;
49
+ }
50
+ return total;
51
+ }
52
+ function _browserPid() {
53
+ try { return browser?.process?.()?.pid ?? null; } catch { return null; }
54
+ }
55
+ function _resourceOpts() {
56
+ return { sessionCount: sessions.size, tabCount: _countTabs(), browserPid: _browserPid() };
57
+ }
45
58
  reporter.startWatchdog(5000, () => {
46
59
  const summary = [];
47
- for (const [userId, session] of sessions) {
48
- const urls = [];
49
- for (const group of session.tabGroups.values()) {
50
- for (const tab of group.values()) {
51
- try { if (tab.page) urls.push(tab.page.url()); } catch {}
52
- }
60
+ for (const [sid, session] of sessions) {
61
+ const tabUrls = [];
62
+ for (const [tid, tab] of session.tabs) {
63
+ try {
64
+ const url = tab.page?.url?.() || 'unknown';
65
+ tabUrls.push(url);
66
+ } catch { tabUrls.push('error'); }
53
67
  }
54
- summary.push({ userId, urls });
68
+ if (tabUrls.length > 0) summary.push({ session: sid, tabs: tabUrls.length, urls: tabUrls });
55
69
  }
56
- return { sessions: sessions.size, summary };
70
+ return { resourceOpts: _resourceOpts(), sessions: summary.length, summary };
57
71
  });
58
72
 
59
73
  // --- Plugin event bus ---
@@ -101,6 +115,7 @@ app.use((req, res, next) => {
101
115
  }
102
116
 
103
117
  const action = actionFromReq(req);
118
+ reporter.trackRoute(`${req.method} ${req.route?.path || '[unmatched]'}`);
104
119
  const done = requestDuration.startTimer({ action });
105
120
 
106
121
  const origEnd = res.end.bind(res);
@@ -345,6 +360,8 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
345
360
  });
346
361
 
347
362
  let browser = null;
363
+ let _lastBrowserPid = null; // Track PID independently for force-kill after close
364
+ let _browserClosePromise = null; // Shared promise for concurrent close serialization
348
365
  // userId -> { context, tabGroups: Map<sessionKey, Map<tabId, TabState>>, lastAccess }
349
366
  // TabState = { page, refs: Map<refId, {role, name, nth}>, visitedUrls: Set, downloads: Array, toolCalls: number }
350
367
  // Note: sessionKey was previously called listItemId - both are accepted for backward compatibility
@@ -531,9 +548,7 @@ function scheduleBrowserIdleShutdown() {
531
548
  browserIdleTimer = setTimeout(async () => {
532
549
  if (sessions.size === 0 && browser) {
533
550
  log('info', 'browser idle shutdown (no sessions)');
534
- const b = browser;
535
- browser = null;
536
- await b.close().catch(() => {});
551
+ await closeBrowserFully('idle_shutdown');
537
552
  }
538
553
  }, BROWSER_IDLE_TIMEOUT_MS);
539
554
  }
@@ -587,10 +602,7 @@ async function restartBrowser(reason) {
587
602
  pluginEvents.emit('browser:restart', { reason });
588
603
  try {
589
604
  await closeAllSessions(`browser_restart:${reason}`, { clearDownloads: true, clearLocks: true });
590
- if (browser) {
591
- await browser.close().catch(() => {});
592
- browser = null;
593
- }
605
+ await closeBrowserFully(`browser_restart:${reason}`);
594
606
  pluginEvents.emit('browser:closed', { reason });
595
607
  browserLaunchPromise = null;
596
608
  await ensureBrowser();
@@ -656,6 +668,167 @@ function attachBrowserCleanup(candidateBrowser, localVirtualDisplay) {
656
668
  };
657
669
  }
658
670
 
671
+ /**
672
+ * Close browser with full process-tree cleanup. Handles the race where
673
+ * browser.close() fails/hangs but process tree survives.
674
+ *
675
+ * Serialized: concurrent callers await the same promise (no double-close).
676
+ *
677
+ * Order: capture PID → close browser → force-kill survivors →
678
+ * clean temp profiles → verify FD/handle drop.
679
+ */
680
+ async function closeBrowserFully(reason) {
681
+ if (_browserClosePromise) return _browserClosePromise;
682
+ _browserClosePromise = _closeBrowserFullyImpl(reason);
683
+ try {
684
+ return await _browserClosePromise;
685
+ } finally {
686
+ _browserClosePromise = null;
687
+ }
688
+ }
689
+
690
+ async function _closeBrowserFullyImpl(reason) {
691
+ const b = browser;
692
+ if (!b) return;
693
+
694
+ // Capture PID before nulling browser ref — we need it for force-kill
695
+ const pid = _lastBrowserPid;
696
+ const preCloseFds = _countOpenFds();
697
+ const preCloseHandles = _countActiveHandles();
698
+
699
+ // Null the ref so new requests don't use a dying browser
700
+ browser = null;
701
+ _lastBrowserPid = null;
702
+
703
+ // Close through Playwright (sends CDP Browser.close, then SIGKILL process group)
704
+ let closeTimer;
705
+ try {
706
+ await Promise.race([
707
+ b.close(),
708
+ new Promise((_, reject) => { closeTimer = setTimeout(() => reject(new Error('browser.close() timeout')), 10000); }),
709
+ ]);
710
+ } catch (err) {
711
+ log('warn', 'browser.close() failed or timed out', { reason, error: err.message, pid });
712
+ } finally {
713
+ clearTimeout(closeTimer);
714
+ }
715
+
716
+ // Force-kill the entire process tree if any survivors
717
+ if (pid) {
718
+ await _forceKillProcessTree(pid, reason);
719
+ }
720
+
721
+ // Clean up stale Firefox temp profiles (enable_cache: true accumulates data)
722
+ try {
723
+ const cleaned = cleanupStaleFirefoxProfiles();
724
+ if (cleaned.removed > 0) {
725
+ log('info', 'cleaned stale firefox profiles after browser close', cleaned);
726
+ }
727
+ } catch { /* best effort */ }
728
+
729
+ // Reset native memory baseline so next browser measures from fresh
730
+ reporter.resetNativeMemBaseline();
731
+
732
+ // Verify cleanup: check FD/handle counts dropped (after force-kill completes)
733
+ const postCloseFds = _countOpenFds();
734
+ const postCloseHandles = _countActiveHandles();
735
+ if (postCloseFds !== null && preCloseFds !== null) {
736
+ const fdDelta = postCloseFds - preCloseFds;
737
+ // After close we expect fewer FDs. If more leaked, warn.
738
+ if (fdDelta > 10) {
739
+ log('warn', 'FD leak detected after browser close', {
740
+ reason, preCloseFds, postCloseFds, delta: fdDelta,
741
+ preCloseHandles, postCloseHandles,
742
+ });
743
+ }
744
+ }
745
+ log('info', 'browser closed fully', {
746
+ reason, pid, preCloseFds, postCloseFds, preCloseHandles, postCloseHandles,
747
+ });
748
+ }
749
+
750
+ /**
751
+ * Force-kill a browser process tree by PID. On Linux, kills the process group
752
+ * (SIGKILL -pid) then scans /proc for any orphaned children.
753
+ */
754
+ async function _forceKillProcessTree(pid, reason) {
755
+ if (!pid || pid <= 1) return;
756
+
757
+ // Kill the specific browser process first (positive PID = single process)
758
+ try {
759
+ process.kill(pid, 'SIGKILL');
760
+ log('info', 'sent SIGKILL to browser process', { pid, reason });
761
+ } catch (err) {
762
+ if (err.code !== 'ESRCH') {
763
+ log('warn', 'failed to kill browser process', { pid, error: err.message });
764
+ }
765
+ }
766
+
767
+ // Then try the process group (Playwright launches with detached:true on Linux,
768
+ // making the browser a process group leader)
769
+ try {
770
+ process.kill(-pid, 'SIGKILL');
771
+ } catch {
772
+ // ESRCH = group doesn't exist (browser wasn't a group leader), which is fine
773
+ }
774
+
775
+ // Wait for kernel to reparent children to PID 1 before scanning
776
+ await new Promise(r => setTimeout(r, 200));
777
+
778
+ // On Linux: scan /proc for orphaned children that escaped the process group
779
+ // (reparented to PID 1 by init/systemd, common with Firefox content processes).
780
+ // Also checks PPid === Node PID for containerized environments without init.
781
+ if (process.platform === 'linux') {
782
+ const myPid = process.pid;
783
+ // Snapshot the current browser PID to avoid killing a newly launched browser
784
+ const currentBrowserPid = _lastBrowserPid;
785
+ try {
786
+ const procDirs = fs.readdirSync('/proc').filter(d => /^\d+$/.test(d));
787
+ const orphans = [];
788
+ for (const procPid of procDirs) {
789
+ const numPid = parseInt(procPid);
790
+ // Never kill ourselves, the old PID (already killed), or the new browser
791
+ if (numPid === myPid || numPid === pid || numPid === currentBrowserPid) continue;
792
+ try {
793
+ const status = fs.readFileSync(`/proc/${procPid}/status`, 'utf8');
794
+ const ppidMatch = status.match(/PPid:\s+(\d+)/);
795
+ const ppid = ppidMatch ? parseInt(ppidMatch[1]) : -1;
796
+ // Orphaned to init (PID 1) or reparented to us (Node is PID 1 in containers)
797
+ if (ppid === 1 || ppid === myPid) {
798
+ const cmdline = fs.readFileSync(`/proc/${procPid}/cmdline`, 'utf8');
799
+ // Firefox-specific: binary name or Gecko child process marker
800
+ if (/firefox-esr|firefox|camoufox|libxul\.so|GeckoChildProcess/i.test(cmdline)) {
801
+ orphans.push(numPid);
802
+ }
803
+ }
804
+ } catch { /* process vanished or permission denied */ }
805
+ }
806
+ if (orphans.length > 0) {
807
+ log('warn', 'killing orphaned browser child processes', { orphans, reason });
808
+ for (const orphanPid of orphans) {
809
+ try { process.kill(orphanPid, 'SIGKILL'); } catch { /* already dead */ }
810
+ }
811
+ }
812
+ } catch (err) {
813
+ log('warn', 'failed to scan for orphaned browser processes', { error: err.message });
814
+ }
815
+ }
816
+
817
+ // Give the OS a moment to reclaim resources
818
+ await new Promise(r => setTimeout(r, 300));
819
+ }
820
+
821
+ function _countOpenFds() {
822
+ try {
823
+ if (process.platform === 'linux') return fs.readdirSync('/proc/self/fd').length;
824
+ } catch { /* unavailable */ }
825
+ return null;
826
+ }
827
+
828
+ function _countActiveHandles() {
829
+ try { return process._getActiveHandles().length; } catch { return null; }
830
+ }
831
+
659
832
  async function launchBrowserInstance() {
660
833
  const hostOS = getHostOS();
661
834
  const maxAttempts = proxyPool?.launchRetries ?? 1;
@@ -734,7 +907,8 @@ async function launchBrowserInstance() {
734
907
 
735
908
  virtualDisplay = localVirtualDisplay;
736
909
  browserLaunchProxy = launchProxy;
737
- browser = candidateBrowser;
910
+ _lastBrowserPid = candidateBrowser.process?.()?.pid ?? null;
911
+ browser = candidateBrowser; // publish AFTER PID is captured
738
912
  attachBrowserCleanup(browser, localVirtualDisplay);
739
913
  pluginEvents.emit('browser:launched', { browser, display: vdDisplay });
740
914
 
@@ -771,13 +945,7 @@ async function ensureBrowser() {
771
945
  deadSessions: sessions.size,
772
946
  });
773
947
  await closeAllSessions('browser_disconnected', { clearDownloads: true, clearLocks: true });
774
- // Clean up virtual display from dead browser before relaunching
775
- if (virtualDisplay) {
776
- virtualDisplay.kill();
777
- virtualDisplay = null;
778
- }
779
- browserLaunchProxy = null;
780
- browser = null;
948
+ await closeBrowserFully('browser_disconnected');
781
949
  }
782
950
  if (browser) return browser;
783
951
  if (browserLaunchPromise) return browserLaunchPromise;
@@ -1033,10 +1201,19 @@ function handleRouteError(err, req, res, extraFields = {}) {
1033
1201
  if (ts.failureJournal.length > 20) ts.failureJournal = ts.failureJournal.slice(-20);
1034
1202
 
1035
1203
  if (ts.consecutiveFailures === 3) {
1204
+ const _proxyErr = classifyProxyError(err?.message);
1036
1205
  reporter.reportHang(action, req.startTime ? Date.now() - req.startTime : 0, {
1037
1206
  error: err,
1038
- url: ts.lastRequestedUrl || undefined,
1039
1207
  healthSnapshot: ts.healthTracker ? ts.healthTracker.snapshot() : undefined,
1208
+ healthTracker: ts.healthTracker || null,
1209
+ resourceOpts: _resourceOpts(),
1210
+ proxy: proxyPool ? {
1211
+ configured: true,
1212
+ type: proxyPool.mode || null,
1213
+ authConfigured: !!CONFIG.proxy?.username,
1214
+ error: _proxyErr.proxyError,
1215
+ tlsError: _proxyErr.proxyTlsError,
1216
+ } : { configured: false },
1040
1217
  context: {
1041
1218
  failureType,
1042
1219
  consecutiveFailures: ts.consecutiveFailures,
@@ -1717,6 +1894,10 @@ app.get('/health', (req, res) => {
1717
1894
  ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1718
1895
  });
1719
1896
  }
1897
+ const mem = process.memoryUsage();
1898
+ const rssMb = Math.round(mem.rss / 1048576);
1899
+ const heapUsedMb = Math.round(mem.heapUsed / 1048576);
1900
+ const nativeMemMb = rssMb - heapUsedMb;
1720
1901
  res.json({
1721
1902
  ok: true,
1722
1903
  engine: 'camoufox',
@@ -1725,6 +1906,7 @@ app.get('/health', (req, res) => {
1725
1906
  activeTabs: getTotalTabCount(),
1726
1907
  activeSessions: sessions.size,
1727
1908
  consecutiveFailures: healthState.consecutiveNavFailures,
1909
+ memory: { rssMb, heapUsedMb, nativeMemMb },
1728
1910
  ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1729
1911
  });
1730
1912
  });
@@ -4377,11 +4559,8 @@ app.post('/stop', async (req, res) => {
4377
4559
  if (!adminKey || !timingSafeCompare(adminKey, CONFIG.adminKey)) {
4378
4560
  return res.status(403).json({ error: 'Forbidden' });
4379
4561
  }
4380
- if (browser) {
4381
- await browser.close().catch(() => {});
4382
- browser = null;
4383
- }
4384
4562
  await closeAllSessions('admin_stop', { clearDownloads: true, clearLocks: true });
4563
+ await closeBrowserFully('admin_stop');
4385
4564
  res.json({ ok: true, stopped: true, profile: 'camoufox' });
4386
4565
  } catch (err) {
4387
4566
  res.status(500).json({ ok: false, error: safeError(err) });
@@ -4928,7 +5107,7 @@ setInterval(async () => {
4928
5107
  process.on('uncaughtException', (err) => {
4929
5108
  pluginEvents.emit('browser:error', { error: err });
4930
5109
  log('error', 'uncaughtException', { error: err.message, stack: err.stack });
4931
- reporter.reportCrash(err);
5110
+ reporter.reportCrash(err, { resourceOpts: _resourceOpts() });
4932
5111
  process.exit(1);
4933
5112
  });
4934
5113
  process.on('unhandledRejection', (reason) => {
@@ -4958,7 +5137,7 @@ async function gracefulShutdown(signal) {
4958
5137
  clearLocks: false,
4959
5138
  });
4960
5139
 
4961
- if (browser) await browser.close().catch(() => {});
5140
+ await closeBrowserFully(`shutdown:${signal}`);
4962
5141
  process.exit(0);
4963
5142
  }
4964
5143
 
@@ -5018,6 +5197,20 @@ const server = app.listen(PORT, async () => {
5018
5197
  if (tmpCleanup.removed > 0) {
5019
5198
  log('info', 'cleaned up orphaned camoufox temp files', tmpCleanup);
5020
5199
  }
5200
+ const profileCleanup = cleanupStaleFirefoxProfiles();
5201
+ if (profileCleanup.removed > 0) {
5202
+ log('info', 'cleaned up stale firefox profiles on startup', profileCleanup);
5203
+ }
5204
+
5205
+ // Periodic temp profile cleanup every 10 minutes
5206
+ setInterval(() => {
5207
+ try {
5208
+ const cleaned = cleanupStaleFirefoxProfiles();
5209
+ if (cleaned.removed > 0) {
5210
+ log('info', 'periodic firefox profile cleanup', cleaned);
5211
+ }
5212
+ } catch { /* best effort */ }
5213
+ }, 10 * 60 * 1000).unref();
5021
5214
  const traceSweep = sweepOldTraces({
5022
5215
  baseDir: CONFIG.tracesDir,
5023
5216
  ttlMs: CONFIG.tracesTtlHours * 3600 * 1000,