@askjo/camofox-browser 1.4.1 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,11 +1,13 @@
1
1
  import { Camoufox, launchOptions } from 'camoufox-js';
2
+ import { VirtualDisplay } from 'camoufox-js/dist/virtdisplay.js';
2
3
  import { firefox } from 'playwright-core';
3
4
  import express from 'express';
4
5
  import crypto from 'crypto';
5
6
  import os from 'os';
6
7
  import { expandMacro } from './lib/macros.js';
7
8
  import { loadConfig } from './lib/config.js';
8
- import { normalizePlaywrightProxy } from './lib/proxy.js';
9
+ import { normalizePlaywrightProxy, createProxyPool, buildProxyUrl } from './lib/proxy.js';
10
+ import { createFlyHelpers } from './lib/fly.js';
9
11
  import { windowSnapshot } from './lib/snapshot.js';
10
12
  import {
11
13
  MAX_DOWNLOAD_INLINE_BYTES,
@@ -13,18 +15,25 @@ import {
13
15
  clearSessionDownloads,
14
16
  attachDownloadListener,
15
17
  getDownloadsList,
16
- extractPageImages,
17
18
  } from './lib/downloads.js';
18
- import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
19
+ import { extractPageImages } from './lib/images.js';
20
+ import { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
19
21
  import {
20
- register as metricsRegister,
21
- requestsTotal, requestDuration, pageLoadDuration,
22
- activeTabsGauge, tabLockQueueDepth,
23
- tabLockTimeoutsTotal, startMemoryReporter, actionFromReq,
22
+ initMetrics, getRegister, isMetricsEnabled,
23
+ startMemoryReporter, stopMemoryReporter,
24
24
  } from './lib/metrics.js';
25
+ import { actionFromReq, classifyError } from './lib/request-utils.js';
25
26
 
26
27
  const CONFIG = loadConfig();
27
28
 
29
+ const {
30
+ requestsTotal, requestDuration, pageLoadDuration,
31
+ activeTabsGauge, tabLockQueueDepth,
32
+ tabLockTimeoutsTotal,
33
+ failuresTotal, browserRestartsTotal, tabsDestroyedTotal,
34
+ sessionsExpiredTotal, tabsReapedTotal, tabsRecycledTotal,
35
+ } = await initMetrics({ enabled: CONFIG.prometheusEnabled });
36
+
28
37
  // --- Structured logging ---
29
38
  function log(level, msg, fields = {}) {
30
39
  const entry = {
@@ -75,6 +84,13 @@ app.use((req, res, next) => {
75
84
  next();
76
85
  });
77
86
 
87
+ // --- Horizontal scaling (Fly.io multi-machine) ---
88
+ const fly = createFlyHelpers(CONFIG);
89
+ const FLY_MACHINE_ID = fly.machineId;
90
+
91
+ // Route tab requests to the owning machine via fly-replay header.
92
+ app.use('/tabs/:tabId', fly.replayMiddleware(log));
93
+
78
94
  const ALLOWED_URL_SCHEMES = ['http:', 'https:'];
79
95
 
80
96
  // Interactive roles to include - exclude combobox to avoid opening complex widgets
@@ -224,6 +240,7 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
224
240
  log('info', 'cookies imported', { reqId: req.reqId, userId: String(userId), count: sanitized.length });
225
241
  res.json(result);
226
242
  } catch (err) {
243
+ failuresTotal.labels(classifyError(err), 'set_cookies').inc();
227
244
  log('error', 'cookie import failed', { reqId: req.reqId, error: err.message });
228
245
  res.status(500).json({ error: safeError(err) });
229
246
  }
@@ -250,6 +267,8 @@ const FAILURE_THRESHOLD = 3;
250
267
  const MAX_CONSECUTIVE_TIMEOUTS = 3;
251
268
  const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
252
269
 
270
+
271
+
253
272
  // Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
254
273
  // WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
255
274
  class TabLock {
@@ -329,6 +348,10 @@ function withTimeout(promise, ms, label) {
329
348
  ]);
330
349
  }
331
350
 
351
+ function requestTimeoutMs(baseMs = HANDLER_TIMEOUT_MS) {
352
+ return proxyPool?.canRotateSessions ? Math.max(baseMs, 180000) : baseMs;
353
+ }
354
+
332
355
  const userConcurrency = new Map();
333
356
 
334
357
  async function withUserLimit(userId, operation) {
@@ -382,25 +405,27 @@ function getHostOS() {
382
405
  return 'linux';
383
406
  }
384
407
 
385
- function buildProxyConfig() {
386
- const { host, port, username, password } = CONFIG.proxy;
387
-
388
- if (!host || !port) {
389
- log('info', 'no proxy configured');
390
- return null;
391
- }
392
-
393
- log('info', 'proxy configured', { host, port });
394
- return {
395
- server: `http://${host}:${port}`,
396
- username,
397
- password,
398
- };
408
+ // Proxy strategy for outbound browsing.
409
+ const proxyPool = createProxyPool(CONFIG.proxy);
410
+
411
+ if (proxyPool) {
412
+ log('info', 'proxy pool created', {
413
+ mode: proxyPool.mode,
414
+ host: proxyPool.canRotateSessions ? CONFIG.proxy.backconnectHost : CONFIG.proxy.host,
415
+ ports: proxyPool.canRotateSessions ? [CONFIG.proxy.backconnectPort] : CONFIG.proxy.ports,
416
+ poolSize: proxyPool.size,
417
+ country: CONFIG.proxy.country || null,
418
+ state: CONFIG.proxy.state || null,
419
+ city: CONFIG.proxy.city || null,
420
+ });
421
+ } else {
422
+ log('info', 'no proxy configured');
399
423
  }
400
424
 
401
425
  const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
402
426
  let browserIdleTimer = null;
403
427
  let browserLaunchPromise = null;
428
+ let browserWarmRetryTimer = null;
404
429
 
405
430
  function scheduleBrowserIdleShutdown() {
406
431
  clearBrowserIdleTimer();
@@ -423,6 +448,21 @@ function clearBrowserIdleTimer() {
423
448
  }
424
449
  }
425
450
 
451
+ function scheduleBrowserWarmRetry(delayMs = 5000) {
452
+ if (browserWarmRetryTimer || browser || browserLaunchPromise) return;
453
+ browserWarmRetryTimer = setTimeout(async () => {
454
+ browserWarmRetryTimer = null;
455
+ try {
456
+ const start = Date.now();
457
+ await ensureBrowser();
458
+ log('info', 'background browser warm retry succeeded', { ms: Date.now() - start });
459
+ } catch (err) {
460
+ log('warn', 'background browser warm retry failed', { error: err.message, nextDelayMs: delayMs });
461
+ scheduleBrowserWarmRetry(Math.min(delayMs * 2, 30000));
462
+ }
463
+ }, delayMs);
464
+ }
465
+
426
466
  // --- Browser health tracking ---
427
467
  const healthState = {
428
468
  consecutiveNavFailures: 0,
@@ -444,6 +484,7 @@ function recordNavFailure() {
444
484
  async function restartBrowser(reason) {
445
485
  if (healthState.isRecovering) return;
446
486
  healthState.isRecovering = true;
487
+ browserRestartsTotal.labels(reason).inc();
447
488
  log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
448
489
  try {
449
490
  for (const [, session] of sessions) {
@@ -476,30 +517,157 @@ function getTotalTabCount() {
476
517
  return total;
477
518
  }
478
519
 
520
+ // Virtual display for WebGL support and anti-detection.
521
+ // Xvfb gives Firefox a real X display with GLX, enabling software-rendered WebGL
522
+ // via Mesa llvmpipe. Without this, WebGL returns "no context" — a massive bot signal.
523
+ let virtualDisplay = null;
524
+ let browserLaunchProxy = null;
525
+
526
+ async function probeGoogleSearch(candidateBrowser) {
527
+ let context = null;
528
+ try {
529
+ context = await candidateBrowser.newContext({
530
+ viewport: { width: 1280, height: 720 },
531
+ permissions: ['geolocation'],
532
+ });
533
+ const page = await context.newPage();
534
+ await page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 });
535
+ await page.waitForTimeout(1200);
536
+ await page.goto('https://www.google.com/search?q=weather%20today', { waitUntil: 'domcontentloaded', timeout: 30000 });
537
+ await page.waitForTimeout(4000);
538
+
539
+ const blocked = await isGoogleSearchBlocked(page);
540
+ return {
541
+ ok: !blocked && isGoogleSerp(page.url()),
542
+ url: page.url(),
543
+ blocked,
544
+ };
545
+ } finally {
546
+ await context?.close().catch(() => {});
547
+ }
548
+ }
549
+
550
+ function attachBrowserCleanup(candidateBrowser, localVirtualDisplay) {
551
+ const origClose = candidateBrowser.close.bind(candidateBrowser);
552
+ candidateBrowser.close = async (...args) => {
553
+ await origClose(...args);
554
+ browserLaunchProxy = null;
555
+ if (localVirtualDisplay) {
556
+ localVirtualDisplay.kill();
557
+ if (virtualDisplay === localVirtualDisplay) virtualDisplay = null;
558
+ }
559
+ };
560
+ }
561
+
479
562
  async function launchBrowserInstance() {
480
563
  const hostOS = getHostOS();
481
- const proxy = buildProxyConfig();
482
-
483
- log('info', 'launching camoufox', { hostOS, geoip: !!proxy });
484
-
485
- const options = await launchOptions({
486
- headless: true,
487
- os: hostOS,
488
- humanize: true,
489
- enable_cache: true,
490
- proxy: proxy,
491
- geoip: !!proxy,
492
- });
493
- options.proxy = normalizePlaywrightProxy(options.proxy);
494
-
495
- browser = await firefox.launch(options);
496
- log('info', 'camoufox launched');
497
- return browser;
564
+ const maxAttempts = proxyPool?.launchRetries ?? 1;
565
+ let lastError = null;
566
+
567
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
568
+ const launchProxy = proxyPool
569
+ ? proxyPool.getLaunchProxy(proxyPool.canRotateSessions ? `browser-${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}` : undefined)
570
+ : null;
571
+
572
+ let localVirtualDisplay = null;
573
+ let vdDisplay = undefined;
574
+ let candidateBrowser = null;
575
+
576
+ try {
577
+ if (os.platform() === 'linux') {
578
+ localVirtualDisplay = new VirtualDisplay();
579
+ vdDisplay = localVirtualDisplay.get();
580
+ log('info', 'xvfb virtual display started', { display: vdDisplay, attempt });
581
+ }
582
+ } catch (err) {
583
+ log('warn', 'xvfb not available, falling back to headless', { error: err.message, attempt });
584
+ localVirtualDisplay = null;
585
+ }
586
+
587
+ const useVirtualDisplay = !!vdDisplay;
588
+ log('info', 'launching camoufox', {
589
+ hostOS,
590
+ attempt,
591
+ maxAttempts,
592
+ geoip: !!launchProxy,
593
+ proxyMode: proxyPool?.mode || null,
594
+ proxyServer: launchProxy?.server || null,
595
+ proxySession: launchProxy?.sessionId || null,
596
+ proxyPoolSize: proxyPool?.size || 0,
597
+ virtualDisplay: useVirtualDisplay,
598
+ });
599
+
600
+ try {
601
+ const options = await launchOptions({
602
+ headless: useVirtualDisplay ? false : true,
603
+ os: hostOS,
604
+ humanize: true,
605
+ enable_cache: true,
606
+ proxy: launchProxy,
607
+ geoip: !!launchProxy,
608
+ virtual_display: vdDisplay,
609
+ });
610
+ options.proxy = normalizePlaywrightProxy(options.proxy);
611
+
612
+ candidateBrowser = await firefox.launch(options);
613
+
614
+ if (proxyPool?.canRotateSessions) {
615
+ const probe = await probeGoogleSearch(candidateBrowser);
616
+ if (!probe.ok) {
617
+ log('warn', 'browser launch google probe failed', {
618
+ attempt,
619
+ maxAttempts,
620
+ proxySession: launchProxy?.sessionId || null,
621
+ url: probe.url,
622
+ });
623
+ if (attempt < maxAttempts) {
624
+ await candidateBrowser.close().catch(() => {});
625
+ if (localVirtualDisplay) localVirtualDisplay.kill();
626
+ continue;
627
+ }
628
+ // Last attempt: accept browser in degraded mode rather than death-spiraling.
629
+ // Non-Google sites will still work; Google requests will get blocked responses.
630
+ log('error', 'all proxy sessions Google-blocked, accepting browser in degraded mode', {
631
+ maxAttempts,
632
+ proxySession: launchProxy?.sessionId || null,
633
+ });
634
+ }
635
+ }
636
+
637
+ virtualDisplay = localVirtualDisplay;
638
+ browserLaunchProxy = launchProxy;
639
+ browser = candidateBrowser;
640
+ attachBrowserCleanup(browser, localVirtualDisplay);
641
+
642
+ log('info', 'camoufox launched', {
643
+ attempt,
644
+ maxAttempts,
645
+ virtualDisplay: useVirtualDisplay,
646
+ proxyMode: proxyPool?.mode || null,
647
+ proxyServer: launchProxy?.server || null,
648
+ proxySession: launchProxy?.sessionId || null,
649
+ });
650
+ return browser;
651
+ } catch (err) {
652
+ lastError = err;
653
+ log('warn', 'camoufox launch attempt failed', {
654
+ attempt,
655
+ maxAttempts,
656
+ error: err.message,
657
+ proxySession: launchProxy?.sessionId || null,
658
+ });
659
+ await candidateBrowser?.close().catch(() => {});
660
+ if (localVirtualDisplay) localVirtualDisplay.kill();
661
+ }
662
+ }
663
+
664
+ throw lastError || new Error('Failed to launch a usable browser');
498
665
  }
499
666
 
500
667
  async function ensureBrowser() {
501
668
  clearBrowserIdleTimer();
502
669
  if (browser && !browser.isConnected()) {
670
+ failuresTotal.labels('browser_disconnected', 'internal').inc();
503
671
  log('warn', 'browser disconnected, clearing dead sessions and relaunching', {
504
672
  deadSessions: sessions.size,
505
673
  });
@@ -507,13 +675,20 @@ async function ensureBrowser() {
507
675
  await session.context.close().catch(() => {});
508
676
  }
509
677
  sessions.clear();
678
+ // Clean up virtual display from dead browser before relaunching
679
+ if (virtualDisplay) {
680
+ virtualDisplay.kill();
681
+ virtualDisplay = null;
682
+ }
683
+ browserLaunchProxy = null;
510
684
  browser = null;
511
685
  }
512
686
  if (browser) return browser;
513
687
  if (browserLaunchPromise) return browserLaunchPromise;
688
+ const launchTimeoutMs = proxyPool?.launchTimeoutMs ?? 60000;
514
689
  browserLaunchPromise = Promise.race([
515
690
  launchBrowserInstance(),
516
- new Promise((_, reject) => setTimeout(() => reject(new Error('Browser launch timeout (30s)')), 30000)),
691
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`Browser launch timeout (${Math.round(launchTimeoutMs / 1000)}s)`)), launchTimeoutMs)),
517
692
  ]).finally(() => { browserLaunchPromise = null; });
518
693
  return browserLaunchPromise;
519
694
  }
@@ -556,11 +731,26 @@ async function getSession(userId) {
556
731
  contextOptions.timezoneId = 'America/Los_Angeles';
557
732
  contextOptions.geolocation = { latitude: 37.7749, longitude: -122.4194 };
558
733
  }
734
+ let sessionProxy = null;
735
+ if (proxyPool?.canRotateSessions) {
736
+ sessionProxy = proxyPool.getNext(`ctx-${key}-${crypto.randomUUID().replace(/-/g, '').slice(0, 8)}`);
737
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
738
+ log('info', 'session proxy assigned', { userId: key, sessionId: sessionProxy.sessionId });
739
+ } else if (proxyPool) {
740
+ sessionProxy = proxyPool.getNext();
741
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
742
+ log('info', 'session proxy assigned', { userId: key, proxy: sessionProxy.server });
743
+ }
559
744
  const context = await b.newContext(contextOptions);
560
745
 
561
- session = { context, tabGroups: new Map(), lastAccess: Date.now() };
746
+ session = { context, tabGroups: new Map(), lastAccess: Date.now(), proxySessionId: sessionProxy?.sessionId || null };
562
747
  sessions.set(key, session);
563
- log('info', 'session created', { userId: key });
748
+ log('info', 'session created', {
749
+ userId: key,
750
+ proxyMode: proxyPool?.mode || null,
751
+ proxyServer: sessionProxy?.server || browserLaunchProxy?.server || null,
752
+ proxySession: sessionProxy?.sessionId || browserLaunchProxy?.sessionId || null,
753
+ });
564
754
  }
565
755
  session.lastAccess = Date.now();
566
756
  return session;
@@ -599,11 +789,30 @@ function isTabDestroyedError(err) {
599
789
 
600
790
  // Centralized error handler for route catch blocks.
601
791
  // Auto-destroys dead browser sessions and returns appropriate status codes.
792
+ function isProxyError(err) {
793
+ if (!err) return false;
794
+ const msg = err.message || '';
795
+ return msg.includes('NS_ERROR_PROXY') || msg.includes('proxy connection') || msg.includes('Proxy connection');
796
+ }
797
+
602
798
  function handleRouteError(err, req, res, extraFields = {}) {
799
+ const failureType = classifyError(err);
800
+ const action = actionFromReq(req);
801
+ failuresTotal.labels(failureType, action).inc();
802
+
603
803
  const userId = req.body?.userId || req.query?.userId;
604
804
  if (userId && isDeadContextError(err)) {
605
805
  destroySession(userId);
606
806
  }
807
+ // Proxy errors mean the session is dead — rotate at context level.
808
+ // Destroy the user's session so the next request gets a fresh context with a new proxy.
809
+ if (isProxyError(err) && proxyPool?.canRotateSessions && userId) {
810
+ log('warn', 'proxy error detected, destroying user session for fresh proxy on next request', {
811
+ action, userId, error: err.message,
812
+ });
813
+ browserRestartsTotal.labels('proxy_error').inc();
814
+ destroySession(userId);
815
+ }
607
816
  // Track consecutive timeouts per tab and auto-destroy stuck tabs
608
817
  if (userId && isTimeoutError(err)) {
609
818
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
@@ -614,7 +823,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
614
823
  found.tabState.consecutiveTimeouts++;
615
824
  if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
616
825
  log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
617
- destroyTab(session, tabId);
826
+ destroyTab(session, tabId, 'consecutive_timeouts');
618
827
  }
619
828
  }
620
829
  }
@@ -624,7 +833,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
624
833
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
625
834
  const session = sessions.get(normalizeUserId(userId));
626
835
  if (session && tabId) {
627
- destroyTab(session, tabId);
836
+ destroyTab(session, tabId, 'lock_queue');
628
837
  }
629
838
  return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
630
839
  }
@@ -635,7 +844,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
635
844
  sendError(res, err, extraFields);
636
845
  }
637
846
 
638
- function destroyTab(session, tabId) {
847
+ function destroyTab(session, tabId, reason) {
639
848
  const lock = tabLocks.get(tabId);
640
849
  if (lock) {
641
850
  lock.drain();
@@ -645,17 +854,51 @@ function destroyTab(session, tabId) {
645
854
  for (const [listItemId, group] of session.tabGroups) {
646
855
  if (group.has(tabId)) {
647
856
  const tabState = group.get(tabId);
648
- log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
857
+ log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls, reason: reason || 'unknown' });
649
858
  safePageClose(tabState.page);
650
859
  group.delete(tabId);
651
860
  if (group.size === 0) session.tabGroups.delete(listItemId);
652
861
  refreshActiveTabsGauge();
862
+ if (reason) tabsDestroyedTotal.labels(reason).inc();
653
863
  return true;
654
864
  }
655
865
  }
656
866
  return false;
657
867
  }
658
868
 
869
+ /**
870
+ * Recycle the oldest (least-used) tab in a session to free a slot.
871
+ * Closes the old tab's page and removes it from its group.
872
+ * Returns { recycledTabId, recycledFromGroup } or null if no tab to recycle.
873
+ */
874
+ async function recycleOldestTab(session, reqId) {
875
+ let oldestTab = null;
876
+ let oldestGroup = null;
877
+ let oldestGroupKey = null;
878
+ let oldestTabId = null;
879
+ for (const [gKey, group] of session.tabGroups) {
880
+ for (const [tid, ts] of group) {
881
+ if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
882
+ oldestTab = ts;
883
+ oldestGroup = group;
884
+ oldestGroupKey = gKey;
885
+ oldestTabId = tid;
886
+ }
887
+ }
888
+ }
889
+ if (!oldestTab) return null;
890
+
891
+ await safePageClose(oldestTab.page);
892
+ oldestGroup.delete(oldestTabId);
893
+ if (oldestGroup.size === 0) session.tabGroups.delete(oldestGroupKey);
894
+ const lock = tabLocks.get(oldestTabId);
895
+ if (lock) { lock.drain(); tabLocks.delete(oldestTabId); }
896
+ refreshTabLockQueueDepth();
897
+ tabsRecycledTotal.inc();
898
+ log('info', 'tab recycled (limit reached)', { reqId, recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey });
899
+ return { recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey };
900
+ }
901
+
659
902
  function destroySession(userId) {
660
903
  const key = normalizeUserId(userId);
661
904
  const session = sessions.get(key);
@@ -684,9 +927,57 @@ function createTabState(page) {
684
927
  toolCalls: 0,
685
928
  consecutiveTimeouts: 0,
686
929
  lastSnapshot: null,
930
+ lastRequestedUrl: null,
931
+ googleRetryCount: 0,
687
932
  };
688
933
  }
689
934
 
935
+ async function isGoogleUnavailable(page) {
936
+ if (!page || page.isClosed()) return false;
937
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
938
+ return /Unable to connect|502 Bad Gateway or Proxy Error|Camoufox can’t establish a connection/.test(bodyText);
939
+ }
940
+
941
+ async function rotateGoogleTab(userId, sessionKey, tabId, previousTabState, reason, reqId) {
942
+ if (!previousTabState?.lastRequestedUrl || !isGoogleSearchUrl(previousTabState.lastRequestedUrl)) return null;
943
+ if ((previousTabState.googleRetryCount || 0) >= 3) return null;
944
+
945
+ browserRestartsTotal.labels(reason).inc(); // track rotation events (not a full restart)
946
+
947
+ // Rotate at context level — create a fresh context with a new proxy session
948
+ // instead of restarting the entire browser (which kills ALL sessions/tabs).
949
+ const key = normalizeUserId(userId);
950
+ const oldSession = sessions.get(key);
951
+ if (oldSession) {
952
+ await oldSession.context.close().catch(() => {});
953
+ sessions.delete(key);
954
+ }
955
+ const session = await getSession(userId);
956
+ const group = getTabGroup(session, sessionKey);
957
+ const page = await session.context.newPage();
958
+ const tabState = createTabState(page);
959
+ tabState.googleRetryCount = (previousTabState.googleRetryCount || 0) + 1;
960
+ tabState.lastRequestedUrl = previousTabState.lastRequestedUrl;
961
+ attachDownloadListener(tabState, tabId, log);
962
+ group.set(tabId, tabState);
963
+ refreshActiveTabsGauge();
964
+
965
+ log('warn', 'replaying google search on fresh context (per-context proxy rotation)', {
966
+ reqId,
967
+ tabId,
968
+ retryCount: tabState.googleRetryCount,
969
+ url: tabState.lastRequestedUrl,
970
+ proxySession: session.proxySessionId || null,
971
+ });
972
+
973
+ await withPageLoadDuration('navigate', () => page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
974
+ tabState.visitedUrls.add('https://www.google.com/');
975
+ await page.waitForTimeout(1200);
976
+ await withPageLoadDuration('navigate', () => page.goto(tabState.lastRequestedUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
977
+ tabState.visitedUrls.add(tabState.lastRequestedUrl);
978
+ return { session, tabState };
979
+ }
980
+
690
981
  function refreshActiveTabsGauge() {
691
982
  activeTabsGauge.set(getTotalTabCount());
692
983
  }
@@ -711,7 +1002,14 @@ async function withPageLoadDuration(action, fn) {
711
1002
 
712
1003
 
713
1004
  async function waitForPageReady(page, options = {}) {
714
- const { timeout = 10000, waitForNetwork = true } = options;
1005
+ const {
1006
+ timeout = 10000,
1007
+ waitForNetwork = true,
1008
+ waitForHydration = true,
1009
+ settleMs = 200,
1010
+ hydrationPollMs = 250,
1011
+ hydrationTimeoutMs = Math.min(timeout, 10000),
1012
+ } = options;
715
1013
 
716
1014
  try {
717
1015
  await page.waitForLoadState('domcontentloaded', { timeout });
@@ -722,27 +1020,28 @@ async function waitForPageReady(page, options = {}) {
722
1020
  });
723
1021
  }
724
1022
 
725
- // Framework hydration wait (React/Next.js/Vue) - mirrors Swift WebView.swift logic
726
- // Wait for readyState === 'complete' + network quiet (40 iterations × 250ms max)
727
- await page.evaluate(async () => {
728
- for (let i = 0; i < 40; i++) {
729
- // Check if network is quiet (no recent resource loads)
730
- const entries = performance.getEntriesByType('resource');
731
- const recentEntries = entries.slice(-5);
732
- const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
733
-
734
- if (document.readyState === 'complete' && netQuiet) {
735
- // Double RAF to ensure paint is complete
736
- await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
737
- break;
1023
+ if (waitForHydration) {
1024
+ const maxIterations = Math.max(1, Math.floor(hydrationTimeoutMs / hydrationPollMs));
1025
+ await page.evaluate(async ({ maxIterations, hydrationPollMs }) => {
1026
+ for (let i = 0; i < maxIterations; i++) {
1027
+ const entries = performance.getEntriesByType('resource');
1028
+ const recentEntries = entries.slice(-5);
1029
+ const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
1030
+
1031
+ if (document.readyState === 'complete' && netQuiet) {
1032
+ await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
1033
+ break;
1034
+ }
1035
+ await new Promise(r => setTimeout(r, hydrationPollMs));
738
1036
  }
739
- await new Promise(r => setTimeout(r, 250));
740
- }
741
- }).catch(() => {
742
- log('warn', 'hydration wait failed, continuing');
743
- });
1037
+ }, { maxIterations, hydrationPollMs }).catch(() => {
1038
+ log('warn', 'hydration wait failed, continuing');
1039
+ });
1040
+ }
744
1041
 
745
- await page.waitForTimeout(200);
1042
+ if (settleMs > 0) {
1043
+ await page.waitForTimeout(settleMs);
1044
+ }
746
1045
 
747
1046
  // Auto-dismiss common consent/privacy dialogs
748
1047
  await dismissConsentDialogs(page);
@@ -809,6 +1108,25 @@ function isGoogleSerp(url) {
809
1108
  }
810
1109
  }
811
1110
 
1111
+ function isGoogleSearchUrl(url) {
1112
+ try {
1113
+ const parsed = new URL(url);
1114
+ return parsed.hostname.includes('google.') && parsed.pathname === '/search';
1115
+ } catch {
1116
+ return false;
1117
+ }
1118
+ }
1119
+
1120
+ async function isGoogleSearchBlocked(page) {
1121
+ if (!page || page.isClosed()) return false;
1122
+
1123
+ const url = page.url();
1124
+ if (url.includes('google.com/sorry/')) return true;
1125
+
1126
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
1127
+ return /Our systems have detected unusual traffic|About this page|If you're having trouble accessing Google Search|SG_REL/.test(bodyText);
1128
+ }
1129
+
812
1130
  // --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
813
1131
  // Returns { refs: Map, snapshot: string }
814
1132
  async function extractGoogleSerp(page) {
@@ -949,6 +1267,8 @@ async function extractGoogleSerp(page) {
949
1267
  return { refs, snapshot: extracted.snapshot };
950
1268
  }
951
1269
 
1270
+ const REFRESH_READY_TIMEOUT_MS = 2500;
1271
+
952
1272
  async function buildRefs(page) {
953
1273
  const refs = new Map();
954
1274
 
@@ -967,16 +1287,20 @@ async function buildRefs(page) {
967
1287
  const start = Date.now();
968
1288
 
969
1289
  // Hard total timeout on the entire buildRefs operation
970
- const timeoutPromise = new Promise((_, reject) =>
971
- setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
972
- );
1290
+ let timerId;
1291
+ const timeoutPromise = new Promise((_, reject) => {
1292
+ timerId = setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS);
1293
+ });
973
1294
 
974
1295
  try {
975
- return await Promise.race([
1296
+ const result = await Promise.race([
976
1297
  _buildRefsInner(page, refs, start),
977
1298
  timeoutPromise
978
1299
  ]);
1300
+ clearTimeout(timerId);
1301
+ return result;
979
1302
  } catch (err) {
1303
+ clearTimeout(timerId);
980
1304
  if (err.message === 'buildRefs_timeout') {
981
1305
  log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
982
1306
  return refs;
@@ -986,7 +1310,12 @@ async function buildRefs(page) {
986
1310
  }
987
1311
 
988
1312
  async function _buildRefsInner(page, refs, start) {
989
- await waitForPageReady(page, { waitForNetwork: false });
1313
+ await waitForPageReady(page, {
1314
+ timeout: REFRESH_READY_TIMEOUT_MS,
1315
+ waitForNetwork: false,
1316
+ waitForHydration: false,
1317
+ settleMs: 100,
1318
+ });
990
1319
 
991
1320
  // Budget remaining time for ariaSnapshot
992
1321
  const elapsed = Date.now() - start;
@@ -1055,7 +1384,12 @@ async function getAriaSnapshot(page) {
1055
1384
  if (!page || page.isClosed()) {
1056
1385
  return null;
1057
1386
  }
1058
- await waitForPageReady(page, { waitForNetwork: false });
1387
+ await waitForPageReady(page, {
1388
+ timeout: REFRESH_READY_TIMEOUT_MS,
1389
+ waitForNetwork: false,
1390
+ waitForHydration: false,
1391
+ settleMs: 100,
1392
+ });
1059
1393
  try {
1060
1394
  return await page.locator('body').ariaSnapshot({ timeout: 5000 });
1061
1395
  } catch (err) {
@@ -1078,11 +1412,46 @@ function refToLocator(page, ref, refs) {
1078
1412
  return locator;
1079
1413
  }
1080
1414
 
1415
+ async function refreshTabRefs(tabState, options = {}) {
1416
+ const {
1417
+ reason = 'refresh',
1418
+ timeoutMs = null,
1419
+ preserveExistingOnEmpty = true,
1420
+ } = options;
1421
+
1422
+ const beforeUrl = tabState.page?.url?.() || '';
1423
+ const existingRefs = tabState.refs instanceof Map ? tabState.refs : new Map();
1424
+ const refreshPromise = buildRefs(tabState.page);
1425
+
1426
+ let refreshedRefs;
1427
+ if (timeoutMs) {
1428
+ const timeoutLabel = `${reason}_refs_timeout`;
1429
+ refreshedRefs = await Promise.race([
1430
+ refreshPromise,
1431
+ new Promise((_, reject) => setTimeout(() => reject(new Error(timeoutLabel)), timeoutMs)),
1432
+ ]);
1433
+ } else {
1434
+ refreshedRefs = await refreshPromise;
1435
+ }
1436
+
1437
+ const afterUrl = tabState.page?.url?.() || beforeUrl;
1438
+ if (preserveExistingOnEmpty && refreshedRefs.size === 0 && existingRefs.size > 0 && beforeUrl === afterUrl) {
1439
+ log('warn', 'preserving previous refs after empty rebuild', {
1440
+ reason,
1441
+ url: afterUrl,
1442
+ previousRefs: existingRefs.size,
1443
+ });
1444
+ return existingRefs;
1445
+ }
1446
+
1447
+ return refreshedRefs;
1448
+ }
1449
+
1081
1450
  // --- YouTube transcript ---
1082
1451
  // Implementation extracted to lib/youtube.js to avoid scanner false positives
1083
1452
  // (child_process + app.post in same file triggers OpenClaw skill-scanner)
1084
1453
 
1085
- detectYtDlp(log);
1454
+ await detectYtDlp(log);
1086
1455
 
1087
1456
  app.post('/youtube/transcript', async (req, res) => {
1088
1457
  const reqId = req.reqId;
@@ -1102,14 +1471,23 @@ app.post('/youtube/transcript', async (req, res) => {
1102
1471
  const videoId = videoIdMatch[1];
1103
1472
  const lang = languages[0] || 'en';
1104
1473
 
1105
- log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
1474
+ // Re-detect yt-dlp if startup detection failed (transient issue)
1475
+ await ensureYtDlp(log);
1476
+
1477
+ const ytDlpProxyUrl = buildProxyUrl(proxyPool, CONFIG.proxy);
1478
+ log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser', hasProxy: !!ytDlpProxyUrl });
1106
1479
 
1107
1480
  let result;
1108
1481
  if (hasYtDlp()) {
1109
1482
  try {
1110
- result = await ytDlpTranscript(reqId, url, videoId, lang);
1483
+ result = await ytDlpTranscript(reqId, url, videoId, lang, ytDlpProxyUrl);
1111
1484
  } catch (ytErr) {
1112
- log('warn', 'yt-dlp failed, falling back to browser', { reqId, error: ytErr.message });
1485
+ log('warn', 'yt-dlp threw, falling back to browser', { reqId, error: ytErr.message });
1486
+ result = null;
1487
+ }
1488
+ // If yt-dlp returned an error result (e.g. no captions) or threw, try browser
1489
+ if (!result || result.status !== 'ok') {
1490
+ if (result) log('warn', 'yt-dlp returned error, falling back to browser', { reqId, status: result.status, code: result.code });
1113
1491
  result = await browserTranscript(reqId, url, videoId, lang);
1114
1492
  }
1115
1493
  } else {
@@ -1119,6 +1497,7 @@ app.post('/youtube/transcript', async (req, res) => {
1119
1497
  log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
1120
1498
  res.json(result);
1121
1499
  } catch (err) {
1500
+ failuresTotal.labels(classifyError(err), 'youtube_transcript').inc();
1122
1501
  log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
1123
1502
  res.status(500).json({ error: safeError(err) });
1124
1503
  }
@@ -1237,6 +1616,16 @@ async function browserTranscript(reqId, url, videoId, lang) {
1237
1616
  };
1238
1617
  } finally {
1239
1618
  await safePageClose(page);
1619
+ // Clean up phantom transcript session if no tabs remain
1620
+ const ytSession = sessions.get(normalizeUserId('__yt_transcript__'));
1621
+ if (ytSession) {
1622
+ let totalTabs = 0;
1623
+ for (const g of ytSession.tabGroups.values()) totalTabs += g.size;
1624
+ if (totalTabs === 0) {
1625
+ ytSession.context.close().catch(() => {});
1626
+ sessions.delete(normalizeUserId('__yt_transcript__'));
1627
+ }
1628
+ }
1240
1629
  }
1241
1630
  });
1242
1631
  }
@@ -1246,19 +1635,37 @@ app.get('/health', (req, res) => {
1246
1635
  return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
1247
1636
  }
1248
1637
  const running = browser !== null && (browser.isConnected?.() ?? false);
1638
+ if (proxyPool?.canRotateSessions && !running) {
1639
+ scheduleBrowserWarmRetry();
1640
+ return res.status(503).json({
1641
+ ok: false,
1642
+ engine: 'camoufox',
1643
+ browserConnected: false,
1644
+ browserRunning: false,
1645
+ warming: true,
1646
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1647
+ });
1648
+ }
1249
1649
  res.json({
1250
1650
  ok: true,
1251
1651
  engine: 'camoufox',
1252
1652
  browserConnected: running,
1253
1653
  browserRunning: running,
1254
1654
  activeTabs: getTotalTabCount(),
1655
+ activeSessions: sessions.size,
1255
1656
  consecutiveFailures: healthState.consecutiveNavFailures,
1657
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1256
1658
  });
1257
1659
  });
1258
1660
 
1259
1661
  app.get('/metrics', async (_req, res) => {
1260
- res.set('Content-Type', metricsRegister.contentType);
1261
- res.send(await metricsRegister.metrics());
1662
+ const reg = getRegister();
1663
+ if (!reg) {
1664
+ res.status(404).json({ error: 'Prometheus metrics disabled. Set PROMETHEUS_ENABLED=1 to enable.' });
1665
+ return;
1666
+ }
1667
+ res.set('Content-Type', reg.contentType);
1668
+ res.send(await reg.metrics());
1262
1669
  });
1263
1670
 
1264
1671
  // Create new tab
@@ -1276,18 +1683,19 @@ app.post('/tabs', async (req, res) => {
1276
1683
 
1277
1684
  let totalTabs = 0;
1278
1685
  for (const group of session.tabGroups.values()) totalTabs += group.size;
1279
- if (totalTabs >= MAX_TABS_PER_SESSION) {
1280
- throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1281
- }
1282
1686
 
1283
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
1284
- throw Object.assign(new Error('Maximum global tabs reached'), { statusCode: 429 });
1687
+ // Recycle oldest tab when limits are reached instead of rejecting
1688
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
1689
+ const recycled = await recycleOldestTab(session, req.reqId);
1690
+ if (!recycled) {
1691
+ throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1692
+ }
1285
1693
  }
1286
1694
 
1287
1695
  const group = getTabGroup(session, resolvedSessionKey);
1288
1696
 
1289
1697
  const page = await session.context.newPage();
1290
- const tabId = crypto.randomUUID();
1698
+ const tabId = fly.makeTabId();
1291
1699
  const tabState = createTabState(page);
1292
1700
  attachDownloadListener(tabState, tabId);
1293
1701
  group.set(tabId, tabState);
@@ -1296,13 +1704,14 @@ app.post('/tabs', async (req, res) => {
1296
1704
  if (url) {
1297
1705
  const urlErr = validateUrl(url);
1298
1706
  if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
1707
+ tabState.lastRequestedUrl = url;
1299
1708
  await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1300
1709
  tabState.visitedUrls.add(url);
1301
1710
  }
1302
1711
 
1303
1712
  log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
1304
1713
  return { tabId, url: page.url() };
1305
- })(), HANDLER_TIMEOUT_MS, 'tab create');
1714
+ })(), requestTimeoutMs(), 'tab create');
1306
1715
 
1307
1716
  res.json(result);
1308
1717
  } catch (err) {
@@ -1321,40 +1730,23 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1321
1730
 
1322
1731
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1323
1732
  await ensureBrowser();
1733
+ const resolvedSessionKey = sessionKey || listItemId || 'default';
1324
1734
  let session = sessions.get(normalizeUserId(userId));
1325
1735
  let found = session && findTab(session, tabId);
1326
1736
 
1327
1737
  let tabState;
1328
1738
  if (!found) {
1329
- const resolvedSessionKey = sessionKey || listItemId || 'default';
1330
1739
  session = await getSession(userId);
1331
1740
  let sessionTabs = 0;
1332
1741
  for (const g of session.tabGroups.values()) sessionTabs += g.size;
1333
1742
  if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
1334
- // Reuse oldest tab in session instead of rejecting
1335
- let oldestTab = null;
1336
- let oldestGroup = null;
1337
- let oldestTabId = null;
1338
- for (const [gKey, group] of session.tabGroups) {
1339
- for (const [tid, ts] of group) {
1340
- if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
1341
- oldestTab = ts;
1342
- oldestGroup = group;
1343
- oldestTabId = tid;
1344
- }
1345
- }
1346
- }
1347
- if (oldestTab) {
1348
- tabState = oldestTab;
1349
- const group = getTabGroup(session, resolvedSessionKey);
1350
- if (oldestGroup) oldestGroup.delete(oldestTabId);
1351
- group.set(tabId, tabState);
1352
- { const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
1353
- log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
1354
- } else {
1743
+ // Recycle oldest tab to free a slot, then create new page
1744
+ const recycled = await recycleOldestTab(session, req.reqId);
1745
+ if (!recycled) {
1355
1746
  throw new Error('Maximum tabs per session reached');
1356
1747
  }
1357
- } else {
1748
+ }
1749
+ {
1358
1750
  const page = await session.context.newPage();
1359
1751
  tabState = createTabState(page);
1360
1752
  attachDownloadListener(tabState, tabId, log);
@@ -1379,9 +1771,61 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1379
1771
  if (urlErr) throw new Error(urlErr);
1380
1772
 
1381
1773
  return await withTabLock(tabId, async () => {
1382
- await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1383
- tabState.visitedUrls.add(targetUrl);
1384
- tabState.lastSnapshot = null;
1774
+ const currentSessionKey = found?.listItemId || resolvedSessionKey;
1775
+ const isGoogleSearch = isGoogleSearchUrl(targetUrl);
1776
+
1777
+ const navigateCurrentPage = async () => {
1778
+ tabState.lastRequestedUrl = targetUrl;
1779
+ await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1780
+ tabState.visitedUrls.add(targetUrl);
1781
+ tabState.lastSnapshot = null;
1782
+ };
1783
+
1784
+ const prewarmGoogleHome = async () => {
1785
+ if (!isGoogleSearch || tabState.visitedUrls.has('https://www.google.com/')) return;
1786
+ await withPageLoadDuration('navigate', () => tabState.page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
1787
+ tabState.visitedUrls.add('https://www.google.com/');
1788
+ await tabState.page.waitForTimeout(1200);
1789
+ };
1790
+
1791
+ const recreateTabOnFreshContext = async () => {
1792
+ const previousRetryCount = tabState.googleRetryCount || 0;
1793
+ browserRestartsTotal.labels('google_search_block').inc();
1794
+ // Rotate at context level — destroy this user's session and create
1795
+ // a fresh one with a new proxy session. Does NOT restart the browser.
1796
+ const key = normalizeUserId(userId);
1797
+ const oldSession = sessions.get(key);
1798
+ if (oldSession) {
1799
+ await oldSession.context.close().catch(() => {});
1800
+ sessions.delete(key);
1801
+ }
1802
+ session = await getSession(userId);
1803
+ const group = getTabGroup(session, currentSessionKey);
1804
+ const page = await session.context.newPage();
1805
+ tabState = createTabState(page);
1806
+ tabState.googleRetryCount = previousRetryCount + 1;
1807
+ attachDownloadListener(tabState, tabId, log);
1808
+ group.set(tabId, tabState);
1809
+ refreshActiveTabsGauge();
1810
+ };
1811
+
1812
+ if (isGoogleSearch && proxyPool?.canRotateSessions) {
1813
+ await prewarmGoogleHome();
1814
+ }
1815
+
1816
+ await navigateCurrentPage();
1817
+
1818
+ if (isGoogleSearch && proxyPool?.canRotateSessions && await isGoogleSearchBlocked(tabState.page)) {
1819
+ log('warn', 'google search blocked, rotating browser proxy session', {
1820
+ reqId: req.reqId,
1821
+ tabId,
1822
+ url: tabState.page.url(),
1823
+ proxySession: browserLaunchProxy?.sessionId || null,
1824
+ });
1825
+ await recreateTabOnFreshContext();
1826
+ await prewarmGoogleHome();
1827
+ await navigateCurrentPage();
1828
+ }
1385
1829
 
1386
1830
  // For Google SERP: skip eager ref building during navigate.
1387
1831
  // Results render asynchronously after DOMContentLoaded — the snapshot
@@ -1390,11 +1834,15 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1390
1834
  tabState.refs = new Map();
1391
1835
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
1392
1836
  }
1837
+
1838
+ if (isGoogleSearch && await isGoogleSearchBlocked(tabState.page)) {
1839
+ return { ok: false, tabId, url: tabState.page.url(), refsAvailable: false, googleBlocked: true };
1840
+ }
1393
1841
 
1394
1842
  tabState.refs = await buildRefs(tabState.page);
1395
1843
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
1396
- });
1397
- })(), HANDLER_TIMEOUT_MS, 'navigate'));
1844
+ }, requestTimeoutMs());
1845
+ })(), requestTimeoutMs(), 'navigate'));
1398
1846
 
1399
1847
  log('info', 'navigated', { reqId: req.reqId, tabId, url: result.url });
1400
1848
  res.json(result);
@@ -1435,6 +1883,25 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1435
1883
  }
1436
1884
 
1437
1885
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1886
+ if (proxyPool?.canRotateSessions && isGoogleSearchUrl(tabState.lastRequestedUrl || '')) {
1887
+ const blocked = await isGoogleSearchBlocked(tabState.page);
1888
+ const unavailable = !blocked && await isGoogleUnavailable(tabState.page);
1889
+ if (blocked || unavailable) {
1890
+ const rotated = await rotateGoogleTab(userId, found.listItemId, req.params.tabId, tabState, blocked ? 'google_search_block_snapshot' : 'google_search_unavailable_snapshot', req.reqId);
1891
+ if (rotated) {
1892
+ tabState.page = rotated.tabState.page;
1893
+ tabState.refs = rotated.tabState.refs;
1894
+ tabState.visitedUrls = rotated.tabState.visitedUrls;
1895
+ tabState.downloads = rotated.tabState.downloads;
1896
+ tabState.toolCalls = rotated.tabState.toolCalls;
1897
+ tabState.consecutiveTimeouts = rotated.tabState.consecutiveTimeouts;
1898
+ tabState.lastSnapshot = rotated.tabState.lastSnapshot;
1899
+ tabState.lastRequestedUrl = rotated.tabState.lastRequestedUrl;
1900
+ tabState.googleRetryCount = rotated.tabState.googleRetryCount;
1901
+ }
1902
+ }
1903
+ }
1904
+
1438
1905
  const pageUrl = tabState.page.url();
1439
1906
 
1440
1907
  // Google SERP fast path — DOM extraction instead of ariaSnapshot
@@ -1460,7 +1927,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1460
1927
  return response;
1461
1928
  }
1462
1929
 
1463
- tabState.refs = await buildRefs(tabState.page);
1930
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'snapshot' });
1464
1931
  const ariaYaml = await getAriaSnapshot(tabState.page);
1465
1932
 
1466
1933
  let annotatedYaml = ariaYaml || '';
@@ -1516,7 +1983,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1516
1983
  }
1517
1984
 
1518
1985
  return response;
1519
- })(), HANDLER_TIMEOUT_MS, 'snapshot'));
1986
+ })(), requestTimeoutMs(), 'snapshot'));
1520
1987
 
1521
1988
  log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
1522
1989
  res.json(result);
@@ -1634,9 +2101,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1634
2101
  log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
1635
2102
  try {
1636
2103
  const preClickBudget = Math.min(4000, remainingBudget());
1637
- const refreshPromise = buildRefs(tabState.page);
1638
- const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
1639
- tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
2104
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'pre_click', timeoutMs: preClickBudget });
1640
2105
  } catch (e) {
1641
2106
  if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1642
2107
  log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
@@ -1676,9 +2141,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1676
2141
  // If it times out, return without refs (caller's next /snapshot will rebuild them).
1677
2142
  const postClickBudget = Math.max(2000, remainingBudget());
1678
2143
  try {
1679
- const refsPromise = buildRefs(tabState.page);
1680
- const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
1681
- tabState.refs = await Promise.race([refsPromise, refsBudget]);
2144
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'post_click', timeoutMs: postClickBudget });
1682
2145
  } catch (e) {
1683
2146
  if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1684
2147
  log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
@@ -1702,7 +2165,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1702
2165
  const session = sessions.get(normalizeUserId(req.body.userId));
1703
2166
  const found = session && findTab(session, tabId);
1704
2167
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1705
- found.tabState.refs = await buildRefs(found.tabState.page);
2168
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'click_timeout' });
1706
2169
  found.tabState.lastSnapshot = null;
1707
2170
  return res.status(500).json({
1708
2171
  error: safeError(err),
@@ -1741,7 +2204,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1741
2204
  let locator = refToLocator(tabState.page, ref, tabState.refs);
1742
2205
  if (!locator) {
1743
2206
  log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
1744
- tabState.refs = await buildRefs(tabState.page);
2207
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'type' });
1745
2208
  locator = refToLocator(tabState.page, ref, tabState.refs);
1746
2209
  }
1747
2210
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
@@ -1759,7 +2222,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1759
2222
  const session = sessions.get(normalizeUserId(req.body.userId));
1760
2223
  const found = session && findTab(session, tabId);
1761
2224
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1762
- found.tabState.refs = await buildRefs(found.tabState.page);
2225
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'type_timeout' });
1763
2226
  found.tabState.lastSnapshot = null;
1764
2227
  return res.status(500).json({
1765
2228
  error: safeError(err),
@@ -1811,8 +2274,9 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
1811
2274
  const { tabState } = found;
1812
2275
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1813
2276
 
1814
- const delta = direction === 'up' ? -amount : amount;
1815
- await tabState.page.mouse.wheel(0, delta);
2277
+ const isVertical = direction === 'up' || direction === 'down';
2278
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
2279
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
1816
2280
  await tabState.page.waitForTimeout(300);
1817
2281
 
1818
2282
  res.json({ ok: true });
@@ -1974,6 +2438,7 @@ app.get('/tabs/:tabId/downloads', async (req, res) => {
1974
2438
 
1975
2439
  res.json({ tabId: req.params.tabId, downloads });
1976
2440
  } catch (err) {
2441
+ failuresTotal.labels(classifyError(err), 'downloads').inc();
1977
2442
  log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
1978
2443
  res.status(500).json({ error: safeError(err) });
1979
2444
  }
@@ -1999,6 +2464,7 @@ app.get('/tabs/:tabId/images', async (req, res) => {
1999
2464
 
2000
2465
  res.json({ tabId: req.params.tabId, images });
2001
2466
  } catch (err) {
2467
+ failuresTotal.labels(classifyError(err), 'images').inc();
2002
2468
  log('error', 'images failed', { reqId: req.reqId, error: err.message });
2003
2469
  res.status(500).json({ error: safeError(err) });
2004
2470
  }
@@ -2067,6 +2533,7 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
2067
2533
  log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
2068
2534
  res.json({ ok: true, result });
2069
2535
  } catch (err) {
2536
+ failuresTotal.labels(classifyError(err), 'evaluate').inc();
2070
2537
  log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
2071
2538
  res.status(500).json({ error: safeError(err) });
2072
2539
  }
@@ -2075,7 +2542,8 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
2075
2542
  // Close tab
2076
2543
  app.delete('/tabs/:tabId', async (req, res) => {
2077
2544
  try {
2078
- const { userId } = req.body;
2545
+ const userId = req.query.userId || req.body?.userId;
2546
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2079
2547
  const session = sessions.get(normalizeUserId(userId));
2080
2548
  const found = session && findTab(session, req.params.tabId);
2081
2549
  if (found) {
@@ -2099,7 +2567,8 @@ app.delete('/tabs/:tabId', async (req, res) => {
2099
2567
  // Close tab group
2100
2568
  app.delete('/tabs/group/:listItemId', async (req, res) => {
2101
2569
  try {
2102
- const { userId } = req.body;
2570
+ const userId = req.query.userId || req.body?.userId;
2571
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2103
2572
  const session = sessions.get(normalizeUserId(userId));
2104
2573
  const group = session?.tabGroups.get(req.params.listItemId);
2105
2574
  if (group) {
@@ -2160,6 +2629,7 @@ setInterval(() => {
2160
2629
  const now = Date.now();
2161
2630
  for (const [userId, session] of sessions) {
2162
2631
  if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
2632
+ sessionsExpiredTotal.inc();
2163
2633
  clearSessionDownloads(session).catch(() => {});
2164
2634
  session.context.close().catch(() => {});
2165
2635
  sessions.delete(userId);
@@ -2188,6 +2658,7 @@ setInterval(() => {
2188
2658
  if (tabState.toolCalls === tabState._lastReaperToolCalls) {
2189
2659
  const idleMs = now - tabState._lastReaperCheck;
2190
2660
  if (idleMs >= TAB_INACTIVITY_MS) {
2661
+ tabsReapedTotal.inc();
2191
2662
  log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
2192
2663
  safePageClose(tabState.page);
2193
2664
  group.delete(tabId);
@@ -2271,21 +2742,20 @@ app.post('/tabs/open', async (req, res) => {
2271
2742
 
2272
2743
  const session = await getSession(userId);
2273
2744
 
2274
- // Check global tab limit first
2275
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
2276
- return res.status(429).json({ error: 'Maximum global tabs reached' });
2277
- }
2278
-
2745
+ // Recycle oldest tab when limits are reached instead of rejecting
2279
2746
  let totalTabs = 0;
2280
2747
  for (const g of session.tabGroups.values()) totalTabs += g.size;
2281
- if (totalTabs >= MAX_TABS_PER_SESSION) {
2282
- return res.status(429).json({ error: 'Maximum tabs per session reached' });
2748
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
2749
+ const recycled = await recycleOldestTab(session, req.reqId);
2750
+ if (!recycled) {
2751
+ return res.status(429).json({ error: 'Maximum tabs per session reached' });
2752
+ }
2283
2753
  }
2284
2754
 
2285
2755
  const group = getTabGroup(session, listItemId);
2286
2756
 
2287
2757
  const page = await session.context.newPage();
2288
- const tabId = crypto.randomUUID();
2758
+ const tabId = fly.makeTabId();
2289
2759
  const tabState = createTabState(page);
2290
2760
  attachDownloadListener(tabState, tabId, log);
2291
2761
  group.set(tabId, tabState);
@@ -2314,6 +2784,7 @@ app.post('/start', async (req, res) => {
2314
2784
  await ensureBrowser();
2315
2785
  res.json({ ok: true, profile: 'camoufox' });
2316
2786
  } catch (err) {
2787
+ failuresTotal.labels('browser_launch', 'start').inc();
2317
2788
  res.status(500).json({ ok: false, error: safeError(err) });
2318
2789
  }
2319
2790
  });
@@ -2616,8 +3087,9 @@ app.post('/act', async (req, res) => {
2616
3087
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
2617
3088
  await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
2618
3089
  } else {
2619
- const delta = direction === 'up' ? -amount : amount;
2620
- await tabState.page.mouse.wheel(0, delta);
3090
+ const isVertical = direction === 'up' || direction === 'down';
3091
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
3092
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
2621
3093
  }
2622
3094
  await tabState.page.waitForTimeout(300);
2623
3095
  return { ok: true, targetId };
@@ -2717,6 +3189,7 @@ setInterval(async () => {
2717
3189
  await testContext.close();
2718
3190
  healthState.lastSuccessfulNav = Date.now();
2719
3191
  } catch (err) {
3192
+ failuresTotal.labels('health_probe', 'internal').inc();
2720
3193
  log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
2721
3194
  if (testContext) await testContext.close().catch(() => {});
2722
3195
  restartBrowser('health probe failed').catch(() => {});
@@ -2759,12 +3232,21 @@ async function gracefulShutdown(signal) {
2759
3232
  process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
2760
3233
  process.on('SIGINT', () => gracefulShutdown('SIGINT'));
2761
3234
 
3235
+ // Idle self-shutdown REMOVED — it was racing with min_machines_running=2
3236
+ // and stopping machines that Fly couldn't auto-restart fast enough, leaving
3237
+ // only 1 machine to handle all browser traffic (causing timeouts for users).
3238
+ // Fly's auto_stop_machines=false + min_machines_running=2 handles scaling.
3239
+
2762
3240
  const PORT = CONFIG.port;
2763
3241
  const server = app.listen(PORT, async () => {
2764
3242
  startMemoryReporter();
2765
3243
  refreshActiveTabsGauge();
2766
3244
  refreshTabLockQueueDepth();
2767
- log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3245
+ if (FLY_MACHINE_ID) {
3246
+ log('info', 'server started (fly)', { port: PORT, pid: process.pid, machineId: FLY_MACHINE_ID, nodeVersion: process.version });
3247
+ } else {
3248
+ log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3249
+ }
2768
3250
  // Pre-warm browser so first request doesn't eat a 6-7s cold start
2769
3251
  try {
2770
3252
  const start = Date.now();
@@ -2772,8 +3254,10 @@ const server = app.listen(PORT, async () => {
2772
3254
  log('info', 'browser pre-warmed', { ms: Date.now() - start });
2773
3255
  scheduleBrowserIdleShutdown();
2774
3256
  } catch (err) {
2775
- log('error', 'browser pre-warm failed (will retry on first request)', { error: err.message });
3257
+ log('error', 'browser pre-warm failed (will retry in background)', { error: err.message });
3258
+ scheduleBrowserWarmRetry();
2776
3259
  }
3260
+ // Idle self-shutdown removed — Fly manages machine lifecycle via fly.toml.
2777
3261
  });
2778
3262
 
2779
3263
  server.on('error', (err) => {