@askjo/camofox-browser 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,11 +1,13 @@
1
1
  import { Camoufox, launchOptions } from 'camoufox-js';
2
+ import { VirtualDisplay } from 'camoufox-js/dist/virtdisplay.js';
2
3
  import { firefox } from 'playwright-core';
3
4
  import express from 'express';
4
5
  import crypto from 'crypto';
5
6
  import os from 'os';
6
7
  import { expandMacro } from './lib/macros.js';
7
8
  import { loadConfig } from './lib/config.js';
8
- import { normalizePlaywrightProxy } from './lib/proxy.js';
9
+ import { normalizePlaywrightProxy, createProxyPool, buildProxyUrl } from './lib/proxy.js';
10
+ import { createFlyHelpers } from './lib/fly.js';
9
11
  import { windowSnapshot } from './lib/snapshot.js';
10
12
  import {
11
13
  MAX_DOWNLOAD_INLINE_BYTES,
@@ -15,12 +17,14 @@ import {
15
17
  getDownloadsList,
16
18
  extractPageImages,
17
19
  } from './lib/downloads.js';
18
- import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
20
+ import { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
19
21
  import {
20
22
  register as metricsRegister,
21
23
  requestsTotal, requestDuration, pageLoadDuration,
22
24
  activeTabsGauge, tabLockQueueDepth,
23
- tabLockTimeoutsTotal, startMemoryReporter, actionFromReq,
25
+ tabLockTimeoutsTotal, startMemoryReporter, stopMemoryReporter, actionFromReq,
26
+ failuresTotal, browserRestartsTotal, tabsDestroyedTotal,
27
+ sessionsExpiredTotal, tabsReapedTotal, tabsRecycledTotal, classifyError,
24
28
  } from './lib/metrics.js';
25
29
 
26
30
  const CONFIG = loadConfig();
@@ -75,6 +79,13 @@ app.use((req, res, next) => {
75
79
  next();
76
80
  });
77
81
 
82
+ // --- Horizontal scaling (Fly.io multi-machine) ---
83
+ const fly = createFlyHelpers(CONFIG);
84
+ const FLY_MACHINE_ID = fly.machineId;
85
+
86
+ // Route tab requests to the owning machine via fly-replay header.
87
+ app.use('/tabs/:tabId', fly.replayMiddleware(log));
88
+
78
89
  const ALLOWED_URL_SCHEMES = ['http:', 'https:'];
79
90
 
80
91
  // Interactive roles to include - exclude combobox to avoid opening complex widgets
@@ -224,6 +235,7 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
224
235
  log('info', 'cookies imported', { reqId: req.reqId, userId: String(userId), count: sanitized.length });
225
236
  res.json(result);
226
237
  } catch (err) {
238
+ failuresTotal.labels(classifyError(err), 'set_cookies').inc();
227
239
  log('error', 'cookie import failed', { reqId: req.reqId, error: err.message });
228
240
  res.status(500).json({ error: safeError(err) });
229
241
  }
@@ -250,6 +262,8 @@ const FAILURE_THRESHOLD = 3;
250
262
  const MAX_CONSECUTIVE_TIMEOUTS = 3;
251
263
  const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
252
264
 
265
+
266
+
253
267
  // Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
254
268
  // WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
255
269
  class TabLock {
@@ -329,6 +343,10 @@ function withTimeout(promise, ms, label) {
329
343
  ]);
330
344
  }
331
345
 
346
+ function requestTimeoutMs(baseMs = HANDLER_TIMEOUT_MS) {
347
+ return proxyPool?.canRotateSessions ? Math.max(baseMs, 180000) : baseMs;
348
+ }
349
+
332
350
  const userConcurrency = new Map();
333
351
 
334
352
  async function withUserLimit(userId, operation) {
@@ -382,25 +400,27 @@ function getHostOS() {
382
400
  return 'linux';
383
401
  }
384
402
 
385
- function buildProxyConfig() {
386
- const { host, port, username, password } = CONFIG.proxy;
387
-
388
- if (!host || !port) {
389
- log('info', 'no proxy configured');
390
- return null;
391
- }
392
-
393
- log('info', 'proxy configured', { host, port });
394
- return {
395
- server: `http://${host}:${port}`,
396
- username,
397
- password,
398
- };
403
+ // Proxy strategy for outbound browsing.
404
+ const proxyPool = createProxyPool(CONFIG.proxy);
405
+
406
+ if (proxyPool) {
407
+ log('info', 'proxy pool created', {
408
+ mode: proxyPool.mode,
409
+ host: proxyPool.canRotateSessions ? CONFIG.proxy.backconnectHost : CONFIG.proxy.host,
410
+ ports: proxyPool.canRotateSessions ? [CONFIG.proxy.backconnectPort] : CONFIG.proxy.ports,
411
+ poolSize: proxyPool.size,
412
+ country: CONFIG.proxy.country || null,
413
+ state: CONFIG.proxy.state || null,
414
+ city: CONFIG.proxy.city || null,
415
+ });
416
+ } else {
417
+ log('info', 'no proxy configured');
399
418
  }
400
419
 
401
420
  const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
402
421
  let browserIdleTimer = null;
403
422
  let browserLaunchPromise = null;
423
+ let browserWarmRetryTimer = null;
404
424
 
405
425
  function scheduleBrowserIdleShutdown() {
406
426
  clearBrowserIdleTimer();
@@ -423,6 +443,21 @@ function clearBrowserIdleTimer() {
423
443
  }
424
444
  }
425
445
 
446
+ function scheduleBrowserWarmRetry(delayMs = 5000) {
447
+ if (browserWarmRetryTimer || browser || browserLaunchPromise) return;
448
+ browserWarmRetryTimer = setTimeout(async () => {
449
+ browserWarmRetryTimer = null;
450
+ try {
451
+ const start = Date.now();
452
+ await ensureBrowser();
453
+ log('info', 'background browser warm retry succeeded', { ms: Date.now() - start });
454
+ } catch (err) {
455
+ log('warn', 'background browser warm retry failed', { error: err.message, nextDelayMs: delayMs });
456
+ scheduleBrowserWarmRetry(Math.min(delayMs * 2, 30000));
457
+ }
458
+ }, delayMs);
459
+ }
460
+
426
461
  // --- Browser health tracking ---
427
462
  const healthState = {
428
463
  consecutiveNavFailures: 0,
@@ -444,6 +479,7 @@ function recordNavFailure() {
444
479
  async function restartBrowser(reason) {
445
480
  if (healthState.isRecovering) return;
446
481
  healthState.isRecovering = true;
482
+ browserRestartsTotal.labels(reason).inc();
447
483
  log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
448
484
  try {
449
485
  for (const [, session] of sessions) {
@@ -476,30 +512,157 @@ function getTotalTabCount() {
476
512
  return total;
477
513
  }
478
514
 
515
+ // Virtual display for WebGL support and anti-detection.
516
+ // Xvfb gives Firefox a real X display with GLX, enabling software-rendered WebGL
517
+ // via Mesa llvmpipe. Without this, WebGL returns "no context" — a massive bot signal.
518
+ let virtualDisplay = null;
519
+ let browserLaunchProxy = null;
520
+
521
+ async function probeGoogleSearch(candidateBrowser) {
522
+ let context = null;
523
+ try {
524
+ context = await candidateBrowser.newContext({
525
+ viewport: { width: 1280, height: 720 },
526
+ permissions: ['geolocation'],
527
+ });
528
+ const page = await context.newPage();
529
+ await page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 });
530
+ await page.waitForTimeout(1200);
531
+ await page.goto('https://www.google.com/search?q=weather%20today', { waitUntil: 'domcontentloaded', timeout: 30000 });
532
+ await page.waitForTimeout(4000);
533
+
534
+ const blocked = await isGoogleSearchBlocked(page);
535
+ return {
536
+ ok: !blocked && isGoogleSerp(page.url()),
537
+ url: page.url(),
538
+ blocked,
539
+ };
540
+ } finally {
541
+ await context?.close().catch(() => {});
542
+ }
543
+ }
544
+
545
+ function attachBrowserCleanup(candidateBrowser, localVirtualDisplay) {
546
+ const origClose = candidateBrowser.close.bind(candidateBrowser);
547
+ candidateBrowser.close = async (...args) => {
548
+ await origClose(...args);
549
+ browserLaunchProxy = null;
550
+ if (localVirtualDisplay) {
551
+ localVirtualDisplay.kill();
552
+ if (virtualDisplay === localVirtualDisplay) virtualDisplay = null;
553
+ }
554
+ };
555
+ }
556
+
479
557
  async function launchBrowserInstance() {
480
558
  const hostOS = getHostOS();
481
- const proxy = buildProxyConfig();
482
-
483
- log('info', 'launching camoufox', { hostOS, geoip: !!proxy });
484
-
485
- const options = await launchOptions({
486
- headless: true,
487
- os: hostOS,
488
- humanize: true,
489
- enable_cache: true,
490
- proxy: proxy,
491
- geoip: !!proxy,
492
- });
493
- options.proxy = normalizePlaywrightProxy(options.proxy);
494
-
495
- browser = await firefox.launch(options);
496
- log('info', 'camoufox launched');
497
- return browser;
559
+ const maxAttempts = proxyPool?.launchRetries ?? 1;
560
+ let lastError = null;
561
+
562
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
563
+ const launchProxy = proxyPool
564
+ ? proxyPool.getLaunchProxy(proxyPool.canRotateSessions ? `browser-${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}` : undefined)
565
+ : null;
566
+
567
+ let localVirtualDisplay = null;
568
+ let vdDisplay = undefined;
569
+ let candidateBrowser = null;
570
+
571
+ try {
572
+ if (os.platform() === 'linux') {
573
+ localVirtualDisplay = new VirtualDisplay();
574
+ vdDisplay = localVirtualDisplay.get();
575
+ log('info', 'xvfb virtual display started', { display: vdDisplay, attempt });
576
+ }
577
+ } catch (err) {
578
+ log('warn', 'xvfb not available, falling back to headless', { error: err.message, attempt });
579
+ localVirtualDisplay = null;
580
+ }
581
+
582
+ const useVirtualDisplay = !!vdDisplay;
583
+ log('info', 'launching camoufox', {
584
+ hostOS,
585
+ attempt,
586
+ maxAttempts,
587
+ geoip: !!launchProxy,
588
+ proxyMode: proxyPool?.mode || null,
589
+ proxyServer: launchProxy?.server || null,
590
+ proxySession: launchProxy?.sessionId || null,
591
+ proxyPoolSize: proxyPool?.size || 0,
592
+ virtualDisplay: useVirtualDisplay,
593
+ });
594
+
595
+ try {
596
+ const options = await launchOptions({
597
+ headless: useVirtualDisplay ? false : true,
598
+ os: hostOS,
599
+ humanize: true,
600
+ enable_cache: true,
601
+ proxy: launchProxy,
602
+ geoip: !!launchProxy,
603
+ virtual_display: vdDisplay,
604
+ });
605
+ options.proxy = normalizePlaywrightProxy(options.proxy);
606
+
607
+ candidateBrowser = await firefox.launch(options);
608
+
609
+ if (proxyPool?.canRotateSessions) {
610
+ const probe = await probeGoogleSearch(candidateBrowser);
611
+ if (!probe.ok) {
612
+ log('warn', 'browser launch google probe failed', {
613
+ attempt,
614
+ maxAttempts,
615
+ proxySession: launchProxy?.sessionId || null,
616
+ url: probe.url,
617
+ });
618
+ if (attempt < maxAttempts) {
619
+ await candidateBrowser.close().catch(() => {});
620
+ if (localVirtualDisplay) localVirtualDisplay.kill();
621
+ continue;
622
+ }
623
+ // Last attempt: accept browser in degraded mode rather than death-spiraling.
624
+ // Non-Google sites will still work; Google requests will get blocked responses.
625
+ log('error', 'all proxy sessions Google-blocked, accepting browser in degraded mode', {
626
+ maxAttempts,
627
+ proxySession: launchProxy?.sessionId || null,
628
+ });
629
+ }
630
+ }
631
+
632
+ virtualDisplay = localVirtualDisplay;
633
+ browserLaunchProxy = launchProxy;
634
+ browser = candidateBrowser;
635
+ attachBrowserCleanup(browser, localVirtualDisplay);
636
+
637
+ log('info', 'camoufox launched', {
638
+ attempt,
639
+ maxAttempts,
640
+ virtualDisplay: useVirtualDisplay,
641
+ proxyMode: proxyPool?.mode || null,
642
+ proxyServer: launchProxy?.server || null,
643
+ proxySession: launchProxy?.sessionId || null,
644
+ });
645
+ return browser;
646
+ } catch (err) {
647
+ lastError = err;
648
+ log('warn', 'camoufox launch attempt failed', {
649
+ attempt,
650
+ maxAttempts,
651
+ error: err.message,
652
+ proxySession: launchProxy?.sessionId || null,
653
+ });
654
+ await candidateBrowser?.close().catch(() => {});
655
+ if (localVirtualDisplay) localVirtualDisplay.kill();
656
+ }
657
+ }
658
+
659
+ throw lastError || new Error('Failed to launch a usable browser');
498
660
  }
499
661
 
500
662
  async function ensureBrowser() {
501
663
  clearBrowserIdleTimer();
502
664
  if (browser && !browser.isConnected()) {
665
+ failuresTotal.labels('browser_disconnected', 'internal').inc();
503
666
  log('warn', 'browser disconnected, clearing dead sessions and relaunching', {
504
667
  deadSessions: sessions.size,
505
668
  });
@@ -507,13 +670,20 @@ async function ensureBrowser() {
507
670
  await session.context.close().catch(() => {});
508
671
  }
509
672
  sessions.clear();
673
+ // Clean up virtual display from dead browser before relaunching
674
+ if (virtualDisplay) {
675
+ virtualDisplay.kill();
676
+ virtualDisplay = null;
677
+ }
678
+ browserLaunchProxy = null;
510
679
  browser = null;
511
680
  }
512
681
  if (browser) return browser;
513
682
  if (browserLaunchPromise) return browserLaunchPromise;
683
+ const launchTimeoutMs = proxyPool?.launchTimeoutMs ?? 60000;
514
684
  browserLaunchPromise = Promise.race([
515
685
  launchBrowserInstance(),
516
- new Promise((_, reject) => setTimeout(() => reject(new Error('Browser launch timeout (30s)')), 30000)),
686
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`Browser launch timeout (${Math.round(launchTimeoutMs / 1000)}s)`)), launchTimeoutMs)),
517
687
  ]).finally(() => { browserLaunchPromise = null; });
518
688
  return browserLaunchPromise;
519
689
  }
@@ -556,11 +726,26 @@ async function getSession(userId) {
556
726
  contextOptions.timezoneId = 'America/Los_Angeles';
557
727
  contextOptions.geolocation = { latitude: 37.7749, longitude: -122.4194 };
558
728
  }
729
+ let sessionProxy = null;
730
+ if (proxyPool?.canRotateSessions) {
731
+ sessionProxy = proxyPool.getNext(`ctx-${key}-${crypto.randomUUID().replace(/-/g, '').slice(0, 8)}`);
732
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
733
+ log('info', 'session proxy assigned', { userId: key, sessionId: sessionProxy.sessionId });
734
+ } else if (proxyPool) {
735
+ sessionProxy = proxyPool.getNext();
736
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
737
+ log('info', 'session proxy assigned', { userId: key, proxy: sessionProxy.server });
738
+ }
559
739
  const context = await b.newContext(contextOptions);
560
740
 
561
- session = { context, tabGroups: new Map(), lastAccess: Date.now() };
741
+ session = { context, tabGroups: new Map(), lastAccess: Date.now(), proxySessionId: sessionProxy?.sessionId || null };
562
742
  sessions.set(key, session);
563
- log('info', 'session created', { userId: key });
743
+ log('info', 'session created', {
744
+ userId: key,
745
+ proxyMode: proxyPool?.mode || null,
746
+ proxyServer: sessionProxy?.server || browserLaunchProxy?.server || null,
747
+ proxySession: sessionProxy?.sessionId || browserLaunchProxy?.sessionId || null,
748
+ });
564
749
  }
565
750
  session.lastAccess = Date.now();
566
751
  return session;
@@ -599,11 +784,30 @@ function isTabDestroyedError(err) {
599
784
 
600
785
  // Centralized error handler for route catch blocks.
601
786
  // Auto-destroys dead browser sessions and returns appropriate status codes.
787
+ function isProxyError(err) {
788
+ if (!err) return false;
789
+ const msg = err.message || '';
790
+ return msg.includes('NS_ERROR_PROXY') || msg.includes('proxy connection') || msg.includes('Proxy connection');
791
+ }
792
+
602
793
  function handleRouteError(err, req, res, extraFields = {}) {
794
+ const failureType = classifyError(err);
795
+ const action = actionFromReq(req);
796
+ failuresTotal.labels(failureType, action).inc();
797
+
603
798
  const userId = req.body?.userId || req.query?.userId;
604
799
  if (userId && isDeadContextError(err)) {
605
800
  destroySession(userId);
606
801
  }
802
+ // Proxy errors mean the session is dead — rotate at context level.
803
+ // Destroy the user's session so the next request gets a fresh context with a new proxy.
804
+ if (isProxyError(err) && proxyPool?.canRotateSessions && userId) {
805
+ log('warn', 'proxy error detected, destroying user session for fresh proxy on next request', {
806
+ action, userId, error: err.message,
807
+ });
808
+ browserRestartsTotal.labels('proxy_error').inc();
809
+ destroySession(userId);
810
+ }
607
811
  // Track consecutive timeouts per tab and auto-destroy stuck tabs
608
812
  if (userId && isTimeoutError(err)) {
609
813
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
@@ -614,7 +818,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
614
818
  found.tabState.consecutiveTimeouts++;
615
819
  if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
616
820
  log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
617
- destroyTab(session, tabId);
821
+ destroyTab(session, tabId, 'consecutive_timeouts');
618
822
  }
619
823
  }
620
824
  }
@@ -624,7 +828,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
624
828
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
625
829
  const session = sessions.get(normalizeUserId(userId));
626
830
  if (session && tabId) {
627
- destroyTab(session, tabId);
831
+ destroyTab(session, tabId, 'lock_queue');
628
832
  }
629
833
  return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
630
834
  }
@@ -635,7 +839,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
635
839
  sendError(res, err, extraFields);
636
840
  }
637
841
 
638
- function destroyTab(session, tabId) {
842
+ function destroyTab(session, tabId, reason) {
639
843
  const lock = tabLocks.get(tabId);
640
844
  if (lock) {
641
845
  lock.drain();
@@ -645,17 +849,51 @@ function destroyTab(session, tabId) {
645
849
  for (const [listItemId, group] of session.tabGroups) {
646
850
  if (group.has(tabId)) {
647
851
  const tabState = group.get(tabId);
648
- log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
852
+ log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls, reason: reason || 'unknown' });
649
853
  safePageClose(tabState.page);
650
854
  group.delete(tabId);
651
855
  if (group.size === 0) session.tabGroups.delete(listItemId);
652
856
  refreshActiveTabsGauge();
857
+ if (reason) tabsDestroyedTotal.labels(reason).inc();
653
858
  return true;
654
859
  }
655
860
  }
656
861
  return false;
657
862
  }
658
863
 
864
+ /**
865
+ * Recycle the oldest (least-used) tab in a session to free a slot.
866
+ * Closes the old tab's page and removes it from its group.
867
+ * Returns { recycledTabId, recycledFromGroup } or null if no tab to recycle.
868
+ */
869
+ async function recycleOldestTab(session, reqId) {
870
+ let oldestTab = null;
871
+ let oldestGroup = null;
872
+ let oldestGroupKey = null;
873
+ let oldestTabId = null;
874
+ for (const [gKey, group] of session.tabGroups) {
875
+ for (const [tid, ts] of group) {
876
+ if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
877
+ oldestTab = ts;
878
+ oldestGroup = group;
879
+ oldestGroupKey = gKey;
880
+ oldestTabId = tid;
881
+ }
882
+ }
883
+ }
884
+ if (!oldestTab) return null;
885
+
886
+ await safePageClose(oldestTab.page);
887
+ oldestGroup.delete(oldestTabId);
888
+ if (oldestGroup.size === 0) session.tabGroups.delete(oldestGroupKey);
889
+ const lock = tabLocks.get(oldestTabId);
890
+ if (lock) { lock.drain(); tabLocks.delete(oldestTabId); }
891
+ refreshTabLockQueueDepth();
892
+ tabsRecycledTotal.inc();
893
+ log('info', 'tab recycled (limit reached)', { reqId, recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey });
894
+ return { recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey };
895
+ }
896
+
659
897
  function destroySession(userId) {
660
898
  const key = normalizeUserId(userId);
661
899
  const session = sessions.get(key);
@@ -684,9 +922,57 @@ function createTabState(page) {
684
922
  toolCalls: 0,
685
923
  consecutiveTimeouts: 0,
686
924
  lastSnapshot: null,
925
+ lastRequestedUrl: null,
926
+ googleRetryCount: 0,
687
927
  };
688
928
  }
689
929
 
930
+ async function isGoogleUnavailable(page) {
931
+ if (!page || page.isClosed()) return false;
932
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
933
+ return /Unable to connect|502 Bad Gateway or Proxy Error|Camoufox can’t establish a connection/.test(bodyText);
934
+ }
935
+
936
+ async function rotateGoogleTab(userId, sessionKey, tabId, previousTabState, reason, reqId) {
937
+ if (!previousTabState?.lastRequestedUrl || !isGoogleSearchUrl(previousTabState.lastRequestedUrl)) return null;
938
+ if ((previousTabState.googleRetryCount || 0) >= 3) return null;
939
+
940
+ browserRestartsTotal.labels(reason).inc(); // track rotation events (not a full restart)
941
+
942
+ // Rotate at context level — create a fresh context with a new proxy session
943
+ // instead of restarting the entire browser (which kills ALL sessions/tabs).
944
+ const key = normalizeUserId(userId);
945
+ const oldSession = sessions.get(key);
946
+ if (oldSession) {
947
+ await oldSession.context.close().catch(() => {});
948
+ sessions.delete(key);
949
+ }
950
+ const session = await getSession(userId);
951
+ const group = getTabGroup(session, sessionKey);
952
+ const page = await session.context.newPage();
953
+ const tabState = createTabState(page);
954
+ tabState.googleRetryCount = (previousTabState.googleRetryCount || 0) + 1;
955
+ tabState.lastRequestedUrl = previousTabState.lastRequestedUrl;
956
+ attachDownloadListener(tabState, tabId, log);
957
+ group.set(tabId, tabState);
958
+ refreshActiveTabsGauge();
959
+
960
+ log('warn', 'replaying google search on fresh context (per-context proxy rotation)', {
961
+ reqId,
962
+ tabId,
963
+ retryCount: tabState.googleRetryCount,
964
+ url: tabState.lastRequestedUrl,
965
+ proxySession: session.proxySessionId || null,
966
+ });
967
+
968
+ await withPageLoadDuration('navigate', () => page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
969
+ tabState.visitedUrls.add('https://www.google.com/');
970
+ await page.waitForTimeout(1200);
971
+ await withPageLoadDuration('navigate', () => page.goto(tabState.lastRequestedUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
972
+ tabState.visitedUrls.add(tabState.lastRequestedUrl);
973
+ return { session, tabState };
974
+ }
975
+
690
976
  function refreshActiveTabsGauge() {
691
977
  activeTabsGauge.set(getTotalTabCount());
692
978
  }
@@ -711,7 +997,14 @@ async function withPageLoadDuration(action, fn) {
711
997
 
712
998
 
713
999
  async function waitForPageReady(page, options = {}) {
714
- const { timeout = 10000, waitForNetwork = true } = options;
1000
+ const {
1001
+ timeout = 10000,
1002
+ waitForNetwork = true,
1003
+ waitForHydration = true,
1004
+ settleMs = 200,
1005
+ hydrationPollMs = 250,
1006
+ hydrationTimeoutMs = Math.min(timeout, 10000),
1007
+ } = options;
715
1008
 
716
1009
  try {
717
1010
  await page.waitForLoadState('domcontentloaded', { timeout });
@@ -722,27 +1015,28 @@ async function waitForPageReady(page, options = {}) {
722
1015
  });
723
1016
  }
724
1017
 
725
- // Framework hydration wait (React/Next.js/Vue) - mirrors Swift WebView.swift logic
726
- // Wait for readyState === 'complete' + network quiet (40 iterations × 250ms max)
727
- await page.evaluate(async () => {
728
- for (let i = 0; i < 40; i++) {
729
- // Check if network is quiet (no recent resource loads)
730
- const entries = performance.getEntriesByType('resource');
731
- const recentEntries = entries.slice(-5);
732
- const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
733
-
734
- if (document.readyState === 'complete' && netQuiet) {
735
- // Double RAF to ensure paint is complete
736
- await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
737
- break;
1018
+ if (waitForHydration) {
1019
+ const maxIterations = Math.max(1, Math.floor(hydrationTimeoutMs / hydrationPollMs));
1020
+ await page.evaluate(async ({ maxIterations, hydrationPollMs }) => {
1021
+ for (let i = 0; i < maxIterations; i++) {
1022
+ const entries = performance.getEntriesByType('resource');
1023
+ const recentEntries = entries.slice(-5);
1024
+ const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
1025
+
1026
+ if (document.readyState === 'complete' && netQuiet) {
1027
+ await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
1028
+ break;
1029
+ }
1030
+ await new Promise(r => setTimeout(r, hydrationPollMs));
738
1031
  }
739
- await new Promise(r => setTimeout(r, 250));
740
- }
741
- }).catch(() => {
742
- log('warn', 'hydration wait failed, continuing');
743
- });
1032
+ }, { maxIterations, hydrationPollMs }).catch(() => {
1033
+ log('warn', 'hydration wait failed, continuing');
1034
+ });
1035
+ }
744
1036
 
745
- await page.waitForTimeout(200);
1037
+ if (settleMs > 0) {
1038
+ await page.waitForTimeout(settleMs);
1039
+ }
746
1040
 
747
1041
  // Auto-dismiss common consent/privacy dialogs
748
1042
  await dismissConsentDialogs(page);
@@ -809,6 +1103,25 @@ function isGoogleSerp(url) {
809
1103
  }
810
1104
  }
811
1105
 
1106
+ function isGoogleSearchUrl(url) {
1107
+ try {
1108
+ const parsed = new URL(url);
1109
+ return parsed.hostname.includes('google.') && parsed.pathname === '/search';
1110
+ } catch {
1111
+ return false;
1112
+ }
1113
+ }
1114
+
1115
+ async function isGoogleSearchBlocked(page) {
1116
+ if (!page || page.isClosed()) return false;
1117
+
1118
+ const url = page.url();
1119
+ if (url.includes('google.com/sorry/')) return true;
1120
+
1121
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
1122
+ return /Our systems have detected unusual traffic|About this page|If you're having trouble accessing Google Search|SG_REL/.test(bodyText);
1123
+ }
1124
+
812
1125
  // --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
813
1126
  // Returns { refs: Map, snapshot: string }
814
1127
  async function extractGoogleSerp(page) {
@@ -949,6 +1262,8 @@ async function extractGoogleSerp(page) {
949
1262
  return { refs, snapshot: extracted.snapshot };
950
1263
  }
951
1264
 
1265
+ const REFRESH_READY_TIMEOUT_MS = 2500;
1266
+
952
1267
  async function buildRefs(page) {
953
1268
  const refs = new Map();
954
1269
 
@@ -967,16 +1282,20 @@ async function buildRefs(page) {
967
1282
  const start = Date.now();
968
1283
 
969
1284
  // Hard total timeout on the entire buildRefs operation
970
- const timeoutPromise = new Promise((_, reject) =>
971
- setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
972
- );
1285
+ let timerId;
1286
+ const timeoutPromise = new Promise((_, reject) => {
1287
+ timerId = setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS);
1288
+ });
973
1289
 
974
1290
  try {
975
- return await Promise.race([
1291
+ const result = await Promise.race([
976
1292
  _buildRefsInner(page, refs, start),
977
1293
  timeoutPromise
978
1294
  ]);
1295
+ clearTimeout(timerId);
1296
+ return result;
979
1297
  } catch (err) {
1298
+ clearTimeout(timerId);
980
1299
  if (err.message === 'buildRefs_timeout') {
981
1300
  log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
982
1301
  return refs;
@@ -986,7 +1305,12 @@ async function buildRefs(page) {
986
1305
  }
987
1306
 
988
1307
  async function _buildRefsInner(page, refs, start) {
989
- await waitForPageReady(page, { waitForNetwork: false });
1308
+ await waitForPageReady(page, {
1309
+ timeout: REFRESH_READY_TIMEOUT_MS,
1310
+ waitForNetwork: false,
1311
+ waitForHydration: false,
1312
+ settleMs: 100,
1313
+ });
990
1314
 
991
1315
  // Budget remaining time for ariaSnapshot
992
1316
  const elapsed = Date.now() - start;
@@ -1055,7 +1379,12 @@ async function getAriaSnapshot(page) {
1055
1379
  if (!page || page.isClosed()) {
1056
1380
  return null;
1057
1381
  }
1058
- await waitForPageReady(page, { waitForNetwork: false });
1382
+ await waitForPageReady(page, {
1383
+ timeout: REFRESH_READY_TIMEOUT_MS,
1384
+ waitForNetwork: false,
1385
+ waitForHydration: false,
1386
+ settleMs: 100,
1387
+ });
1059
1388
  try {
1060
1389
  return await page.locator('body').ariaSnapshot({ timeout: 5000 });
1061
1390
  } catch (err) {
@@ -1078,11 +1407,46 @@ function refToLocator(page, ref, refs) {
1078
1407
  return locator;
1079
1408
  }
1080
1409
 
1410
+ async function refreshTabRefs(tabState, options = {}) {
1411
+ const {
1412
+ reason = 'refresh',
1413
+ timeoutMs = null,
1414
+ preserveExistingOnEmpty = true,
1415
+ } = options;
1416
+
1417
+ const beforeUrl = tabState.page?.url?.() || '';
1418
+ const existingRefs = tabState.refs instanceof Map ? tabState.refs : new Map();
1419
+ const refreshPromise = buildRefs(tabState.page);
1420
+
1421
+ let refreshedRefs;
1422
+ if (timeoutMs) {
1423
+ const timeoutLabel = `${reason}_refs_timeout`;
1424
+ refreshedRefs = await Promise.race([
1425
+ refreshPromise,
1426
+ new Promise((_, reject) => setTimeout(() => reject(new Error(timeoutLabel)), timeoutMs)),
1427
+ ]);
1428
+ } else {
1429
+ refreshedRefs = await refreshPromise;
1430
+ }
1431
+
1432
+ const afterUrl = tabState.page?.url?.() || beforeUrl;
1433
+ if (preserveExistingOnEmpty && refreshedRefs.size === 0 && existingRefs.size > 0 && beforeUrl === afterUrl) {
1434
+ log('warn', 'preserving previous refs after empty rebuild', {
1435
+ reason,
1436
+ url: afterUrl,
1437
+ previousRefs: existingRefs.size,
1438
+ });
1439
+ return existingRefs;
1440
+ }
1441
+
1442
+ return refreshedRefs;
1443
+ }
1444
+
1081
1445
  // --- YouTube transcript ---
1082
1446
  // Implementation extracted to lib/youtube.js to avoid scanner false positives
1083
1447
  // (child_process + app.post in same file triggers OpenClaw skill-scanner)
1084
1448
 
1085
- detectYtDlp(log);
1449
+ await detectYtDlp(log);
1086
1450
 
1087
1451
  app.post('/youtube/transcript', async (req, res) => {
1088
1452
  const reqId = req.reqId;
@@ -1102,14 +1466,23 @@ app.post('/youtube/transcript', async (req, res) => {
1102
1466
  const videoId = videoIdMatch[1];
1103
1467
  const lang = languages[0] || 'en';
1104
1468
 
1105
- log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
1469
+ // Re-detect yt-dlp if startup detection failed (transient issue)
1470
+ await ensureYtDlp(log);
1471
+
1472
+ const ytDlpProxyUrl = buildProxyUrl(proxyPool, CONFIG.proxy);
1473
+ log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser', hasProxy: !!ytDlpProxyUrl });
1106
1474
 
1107
1475
  let result;
1108
1476
  if (hasYtDlp()) {
1109
1477
  try {
1110
- result = await ytDlpTranscript(reqId, url, videoId, lang);
1478
+ result = await ytDlpTranscript(reqId, url, videoId, lang, ytDlpProxyUrl);
1111
1479
  } catch (ytErr) {
1112
- log('warn', 'yt-dlp failed, falling back to browser', { reqId, error: ytErr.message });
1480
+ log('warn', 'yt-dlp threw, falling back to browser', { reqId, error: ytErr.message });
1481
+ result = null;
1482
+ }
1483
+ // If yt-dlp returned an error result (e.g. no captions) or threw, try browser
1484
+ if (!result || result.status !== 'ok') {
1485
+ if (result) log('warn', 'yt-dlp returned error, falling back to browser', { reqId, status: result.status, code: result.code });
1113
1486
  result = await browserTranscript(reqId, url, videoId, lang);
1114
1487
  }
1115
1488
  } else {
@@ -1119,6 +1492,7 @@ app.post('/youtube/transcript', async (req, res) => {
1119
1492
  log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
1120
1493
  res.json(result);
1121
1494
  } catch (err) {
1495
+ failuresTotal.labels(classifyError(err), 'youtube_transcript').inc();
1122
1496
  log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
1123
1497
  res.status(500).json({ error: safeError(err) });
1124
1498
  }
@@ -1237,6 +1611,16 @@ async function browserTranscript(reqId, url, videoId, lang) {
1237
1611
  };
1238
1612
  } finally {
1239
1613
  await safePageClose(page);
1614
+ // Clean up phantom transcript session if no tabs remain
1615
+ const ytSession = sessions.get(normalizeUserId('__yt_transcript__'));
1616
+ if (ytSession) {
1617
+ let totalTabs = 0;
1618
+ for (const g of ytSession.tabGroups.values()) totalTabs += g.size;
1619
+ if (totalTabs === 0) {
1620
+ ytSession.context.close().catch(() => {});
1621
+ sessions.delete(normalizeUserId('__yt_transcript__'));
1622
+ }
1623
+ }
1240
1624
  }
1241
1625
  });
1242
1626
  }
@@ -1246,13 +1630,26 @@ app.get('/health', (req, res) => {
1246
1630
  return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
1247
1631
  }
1248
1632
  const running = browser !== null && (browser.isConnected?.() ?? false);
1633
+ if (proxyPool?.canRotateSessions && !running) {
1634
+ scheduleBrowserWarmRetry();
1635
+ return res.status(503).json({
1636
+ ok: false,
1637
+ engine: 'camoufox',
1638
+ browserConnected: false,
1639
+ browserRunning: false,
1640
+ warming: true,
1641
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1642
+ });
1643
+ }
1249
1644
  res.json({
1250
1645
  ok: true,
1251
1646
  engine: 'camoufox',
1252
1647
  browserConnected: running,
1253
1648
  browserRunning: running,
1254
1649
  activeTabs: getTotalTabCount(),
1650
+ activeSessions: sessions.size,
1255
1651
  consecutiveFailures: healthState.consecutiveNavFailures,
1652
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1256
1653
  });
1257
1654
  });
1258
1655
 
@@ -1276,18 +1673,19 @@ app.post('/tabs', async (req, res) => {
1276
1673
 
1277
1674
  let totalTabs = 0;
1278
1675
  for (const group of session.tabGroups.values()) totalTabs += group.size;
1279
- if (totalTabs >= MAX_TABS_PER_SESSION) {
1280
- throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1281
- }
1282
1676
 
1283
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
1284
- throw Object.assign(new Error('Maximum global tabs reached'), { statusCode: 429 });
1677
+ // Recycle oldest tab when limits are reached instead of rejecting
1678
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
1679
+ const recycled = await recycleOldestTab(session, req.reqId);
1680
+ if (!recycled) {
1681
+ throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1682
+ }
1285
1683
  }
1286
1684
 
1287
1685
  const group = getTabGroup(session, resolvedSessionKey);
1288
1686
 
1289
1687
  const page = await session.context.newPage();
1290
- const tabId = crypto.randomUUID();
1688
+ const tabId = fly.makeTabId();
1291
1689
  const tabState = createTabState(page);
1292
1690
  attachDownloadListener(tabState, tabId);
1293
1691
  group.set(tabId, tabState);
@@ -1296,13 +1694,14 @@ app.post('/tabs', async (req, res) => {
1296
1694
  if (url) {
1297
1695
  const urlErr = validateUrl(url);
1298
1696
  if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
1697
+ tabState.lastRequestedUrl = url;
1299
1698
  await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1300
1699
  tabState.visitedUrls.add(url);
1301
1700
  }
1302
1701
 
1303
1702
  log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
1304
1703
  return { tabId, url: page.url() };
1305
- })(), HANDLER_TIMEOUT_MS, 'tab create');
1704
+ })(), requestTimeoutMs(), 'tab create');
1306
1705
 
1307
1706
  res.json(result);
1308
1707
  } catch (err) {
@@ -1321,40 +1720,23 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1321
1720
 
1322
1721
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1323
1722
  await ensureBrowser();
1723
+ const resolvedSessionKey = sessionKey || listItemId || 'default';
1324
1724
  let session = sessions.get(normalizeUserId(userId));
1325
1725
  let found = session && findTab(session, tabId);
1326
1726
 
1327
1727
  let tabState;
1328
1728
  if (!found) {
1329
- const resolvedSessionKey = sessionKey || listItemId || 'default';
1330
1729
  session = await getSession(userId);
1331
1730
  let sessionTabs = 0;
1332
1731
  for (const g of session.tabGroups.values()) sessionTabs += g.size;
1333
1732
  if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
1334
- // Reuse oldest tab in session instead of rejecting
1335
- let oldestTab = null;
1336
- let oldestGroup = null;
1337
- let oldestTabId = null;
1338
- for (const [gKey, group] of session.tabGroups) {
1339
- for (const [tid, ts] of group) {
1340
- if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
1341
- oldestTab = ts;
1342
- oldestGroup = group;
1343
- oldestTabId = tid;
1344
- }
1345
- }
1346
- }
1347
- if (oldestTab) {
1348
- tabState = oldestTab;
1349
- const group = getTabGroup(session, resolvedSessionKey);
1350
- if (oldestGroup) oldestGroup.delete(oldestTabId);
1351
- group.set(tabId, tabState);
1352
- { const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
1353
- log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
1354
- } else {
1733
+ // Recycle oldest tab to free a slot, then create new page
1734
+ const recycled = await recycleOldestTab(session, req.reqId);
1735
+ if (!recycled) {
1355
1736
  throw new Error('Maximum tabs per session reached');
1356
1737
  }
1357
- } else {
1738
+ }
1739
+ {
1358
1740
  const page = await session.context.newPage();
1359
1741
  tabState = createTabState(page);
1360
1742
  attachDownloadListener(tabState, tabId, log);
@@ -1379,9 +1761,61 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1379
1761
  if (urlErr) throw new Error(urlErr);
1380
1762
 
1381
1763
  return await withTabLock(tabId, async () => {
1382
- await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1383
- tabState.visitedUrls.add(targetUrl);
1384
- tabState.lastSnapshot = null;
1764
+ const currentSessionKey = found?.listItemId || resolvedSessionKey;
1765
+ const isGoogleSearch = isGoogleSearchUrl(targetUrl);
1766
+
1767
+ const navigateCurrentPage = async () => {
1768
+ tabState.lastRequestedUrl = targetUrl;
1769
+ await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1770
+ tabState.visitedUrls.add(targetUrl);
1771
+ tabState.lastSnapshot = null;
1772
+ };
1773
+
1774
+ const prewarmGoogleHome = async () => {
1775
+ if (!isGoogleSearch || tabState.visitedUrls.has('https://www.google.com/')) return;
1776
+ await withPageLoadDuration('navigate', () => tabState.page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
1777
+ tabState.visitedUrls.add('https://www.google.com/');
1778
+ await tabState.page.waitForTimeout(1200);
1779
+ };
1780
+
1781
+ const recreateTabOnFreshContext = async () => {
1782
+ const previousRetryCount = tabState.googleRetryCount || 0;
1783
+ browserRestartsTotal.labels('google_search_block').inc();
1784
+ // Rotate at context level — destroy this user's session and create
1785
+ // a fresh one with a new proxy session. Does NOT restart the browser.
1786
+ const key = normalizeUserId(userId);
1787
+ const oldSession = sessions.get(key);
1788
+ if (oldSession) {
1789
+ await oldSession.context.close().catch(() => {});
1790
+ sessions.delete(key);
1791
+ }
1792
+ session = await getSession(userId);
1793
+ const group = getTabGroup(session, currentSessionKey);
1794
+ const page = await session.context.newPage();
1795
+ tabState = createTabState(page);
1796
+ tabState.googleRetryCount = previousRetryCount + 1;
1797
+ attachDownloadListener(tabState, tabId, log);
1798
+ group.set(tabId, tabState);
1799
+ refreshActiveTabsGauge();
1800
+ };
1801
+
1802
+ if (isGoogleSearch && proxyPool?.canRotateSessions) {
1803
+ await prewarmGoogleHome();
1804
+ }
1805
+
1806
+ await navigateCurrentPage();
1807
+
1808
+ if (isGoogleSearch && proxyPool?.canRotateSessions && await isGoogleSearchBlocked(tabState.page)) {
1809
+ log('warn', 'google search blocked, rotating browser proxy session', {
1810
+ reqId: req.reqId,
1811
+ tabId,
1812
+ url: tabState.page.url(),
1813
+ proxySession: browserLaunchProxy?.sessionId || null,
1814
+ });
1815
+ await recreateTabOnFreshContext();
1816
+ await prewarmGoogleHome();
1817
+ await navigateCurrentPage();
1818
+ }
1385
1819
 
1386
1820
  // For Google SERP: skip eager ref building during navigate.
1387
1821
  // Results render asynchronously after DOMContentLoaded — the snapshot
@@ -1390,11 +1824,15 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1390
1824
  tabState.refs = new Map();
1391
1825
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
1392
1826
  }
1827
+
1828
+ if (isGoogleSearch && await isGoogleSearchBlocked(tabState.page)) {
1829
+ return { ok: false, tabId, url: tabState.page.url(), refsAvailable: false, googleBlocked: true };
1830
+ }
1393
1831
 
1394
1832
  tabState.refs = await buildRefs(tabState.page);
1395
1833
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
1396
- });
1397
- })(), HANDLER_TIMEOUT_MS, 'navigate'));
1834
+ }, requestTimeoutMs());
1835
+ })(), requestTimeoutMs(), 'navigate'));
1398
1836
 
1399
1837
  log('info', 'navigated', { reqId: req.reqId, tabId, url: result.url });
1400
1838
  res.json(result);
@@ -1435,6 +1873,25 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1435
1873
  }
1436
1874
 
1437
1875
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1876
+ if (proxyPool?.canRotateSessions && isGoogleSearchUrl(tabState.lastRequestedUrl || '')) {
1877
+ const blocked = await isGoogleSearchBlocked(tabState.page);
1878
+ const unavailable = !blocked && await isGoogleUnavailable(tabState.page);
1879
+ if (blocked || unavailable) {
1880
+ const rotated = await rotateGoogleTab(userId, found.listItemId, req.params.tabId, tabState, blocked ? 'google_search_block_snapshot' : 'google_search_unavailable_snapshot', req.reqId);
1881
+ if (rotated) {
1882
+ tabState.page = rotated.tabState.page;
1883
+ tabState.refs = rotated.tabState.refs;
1884
+ tabState.visitedUrls = rotated.tabState.visitedUrls;
1885
+ tabState.downloads = rotated.tabState.downloads;
1886
+ tabState.toolCalls = rotated.tabState.toolCalls;
1887
+ tabState.consecutiveTimeouts = rotated.tabState.consecutiveTimeouts;
1888
+ tabState.lastSnapshot = rotated.tabState.lastSnapshot;
1889
+ tabState.lastRequestedUrl = rotated.tabState.lastRequestedUrl;
1890
+ tabState.googleRetryCount = rotated.tabState.googleRetryCount;
1891
+ }
1892
+ }
1893
+ }
1894
+
1438
1895
  const pageUrl = tabState.page.url();
1439
1896
 
1440
1897
  // Google SERP fast path — DOM extraction instead of ariaSnapshot
@@ -1460,7 +1917,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1460
1917
  return response;
1461
1918
  }
1462
1919
 
1463
- tabState.refs = await buildRefs(tabState.page);
1920
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'snapshot' });
1464
1921
  const ariaYaml = await getAriaSnapshot(tabState.page);
1465
1922
 
1466
1923
  let annotatedYaml = ariaYaml || '';
@@ -1516,7 +1973,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1516
1973
  }
1517
1974
 
1518
1975
  return response;
1519
- })(), HANDLER_TIMEOUT_MS, 'snapshot'));
1976
+ })(), requestTimeoutMs(), 'snapshot'));
1520
1977
 
1521
1978
  log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
1522
1979
  res.json(result);
@@ -1634,9 +2091,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1634
2091
  log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
1635
2092
  try {
1636
2093
  const preClickBudget = Math.min(4000, remainingBudget());
1637
- const refreshPromise = buildRefs(tabState.page);
1638
- const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
1639
- tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
2094
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'pre_click', timeoutMs: preClickBudget });
1640
2095
  } catch (e) {
1641
2096
  if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1642
2097
  log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
@@ -1676,9 +2131,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1676
2131
  // If it times out, return without refs (caller's next /snapshot will rebuild them).
1677
2132
  const postClickBudget = Math.max(2000, remainingBudget());
1678
2133
  try {
1679
- const refsPromise = buildRefs(tabState.page);
1680
- const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
1681
- tabState.refs = await Promise.race([refsPromise, refsBudget]);
2134
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'post_click', timeoutMs: postClickBudget });
1682
2135
  } catch (e) {
1683
2136
  if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1684
2137
  log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
@@ -1702,7 +2155,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1702
2155
  const session = sessions.get(normalizeUserId(req.body.userId));
1703
2156
  const found = session && findTab(session, tabId);
1704
2157
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1705
- found.tabState.refs = await buildRefs(found.tabState.page);
2158
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'click_timeout' });
1706
2159
  found.tabState.lastSnapshot = null;
1707
2160
  return res.status(500).json({
1708
2161
  error: safeError(err),
@@ -1741,7 +2194,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1741
2194
  let locator = refToLocator(tabState.page, ref, tabState.refs);
1742
2195
  if (!locator) {
1743
2196
  log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
1744
- tabState.refs = await buildRefs(tabState.page);
2197
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'type' });
1745
2198
  locator = refToLocator(tabState.page, ref, tabState.refs);
1746
2199
  }
1747
2200
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
@@ -1759,7 +2212,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1759
2212
  const session = sessions.get(normalizeUserId(req.body.userId));
1760
2213
  const found = session && findTab(session, tabId);
1761
2214
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1762
- found.tabState.refs = await buildRefs(found.tabState.page);
2215
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'type_timeout' });
1763
2216
  found.tabState.lastSnapshot = null;
1764
2217
  return res.status(500).json({
1765
2218
  error: safeError(err),
@@ -1811,8 +2264,9 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
1811
2264
  const { tabState } = found;
1812
2265
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1813
2266
 
1814
- const delta = direction === 'up' ? -amount : amount;
1815
- await tabState.page.mouse.wheel(0, delta);
2267
+ const isVertical = direction === 'up' || direction === 'down';
2268
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
2269
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
1816
2270
  await tabState.page.waitForTimeout(300);
1817
2271
 
1818
2272
  res.json({ ok: true });
@@ -1974,6 +2428,7 @@ app.get('/tabs/:tabId/downloads', async (req, res) => {
1974
2428
 
1975
2429
  res.json({ tabId: req.params.tabId, downloads });
1976
2430
  } catch (err) {
2431
+ failuresTotal.labels(classifyError(err), 'downloads').inc();
1977
2432
  log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
1978
2433
  res.status(500).json({ error: safeError(err) });
1979
2434
  }
@@ -1999,6 +2454,7 @@ app.get('/tabs/:tabId/images', async (req, res) => {
1999
2454
 
2000
2455
  res.json({ tabId: req.params.tabId, images });
2001
2456
  } catch (err) {
2457
+ failuresTotal.labels(classifyError(err), 'images').inc();
2002
2458
  log('error', 'images failed', { reqId: req.reqId, error: err.message });
2003
2459
  res.status(500).json({ error: safeError(err) });
2004
2460
  }
@@ -2067,6 +2523,7 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
2067
2523
  log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
2068
2524
  res.json({ ok: true, result });
2069
2525
  } catch (err) {
2526
+ failuresTotal.labels(classifyError(err), 'evaluate').inc();
2070
2527
  log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
2071
2528
  res.status(500).json({ error: safeError(err) });
2072
2529
  }
@@ -2075,7 +2532,8 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
2075
2532
  // Close tab
2076
2533
  app.delete('/tabs/:tabId', async (req, res) => {
2077
2534
  try {
2078
- const { userId } = req.body;
2535
+ const userId = req.query.userId || req.body?.userId;
2536
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2079
2537
  const session = sessions.get(normalizeUserId(userId));
2080
2538
  const found = session && findTab(session, req.params.tabId);
2081
2539
  if (found) {
@@ -2099,7 +2557,8 @@ app.delete('/tabs/:tabId', async (req, res) => {
2099
2557
  // Close tab group
2100
2558
  app.delete('/tabs/group/:listItemId', async (req, res) => {
2101
2559
  try {
2102
- const { userId } = req.body;
2560
+ const userId = req.query.userId || req.body?.userId;
2561
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2103
2562
  const session = sessions.get(normalizeUserId(userId));
2104
2563
  const group = session?.tabGroups.get(req.params.listItemId);
2105
2564
  if (group) {
@@ -2160,6 +2619,7 @@ setInterval(() => {
2160
2619
  const now = Date.now();
2161
2620
  for (const [userId, session] of sessions) {
2162
2621
  if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
2622
+ sessionsExpiredTotal.inc();
2163
2623
  clearSessionDownloads(session).catch(() => {});
2164
2624
  session.context.close().catch(() => {});
2165
2625
  sessions.delete(userId);
@@ -2188,6 +2648,7 @@ setInterval(() => {
2188
2648
  if (tabState.toolCalls === tabState._lastReaperToolCalls) {
2189
2649
  const idleMs = now - tabState._lastReaperCheck;
2190
2650
  if (idleMs >= TAB_INACTIVITY_MS) {
2651
+ tabsReapedTotal.inc();
2191
2652
  log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
2192
2653
  safePageClose(tabState.page);
2193
2654
  group.delete(tabId);
@@ -2271,21 +2732,20 @@ app.post('/tabs/open', async (req, res) => {
2271
2732
 
2272
2733
  const session = await getSession(userId);
2273
2734
 
2274
- // Check global tab limit first
2275
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
2276
- return res.status(429).json({ error: 'Maximum global tabs reached' });
2277
- }
2278
-
2735
+ // Recycle oldest tab when limits are reached instead of rejecting
2279
2736
  let totalTabs = 0;
2280
2737
  for (const g of session.tabGroups.values()) totalTabs += g.size;
2281
- if (totalTabs >= MAX_TABS_PER_SESSION) {
2282
- return res.status(429).json({ error: 'Maximum tabs per session reached' });
2738
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
2739
+ const recycled = await recycleOldestTab(session, req.reqId);
2740
+ if (!recycled) {
2741
+ return res.status(429).json({ error: 'Maximum tabs per session reached' });
2742
+ }
2283
2743
  }
2284
2744
 
2285
2745
  const group = getTabGroup(session, listItemId);
2286
2746
 
2287
2747
  const page = await session.context.newPage();
2288
- const tabId = crypto.randomUUID();
2748
+ const tabId = fly.makeTabId();
2289
2749
  const tabState = createTabState(page);
2290
2750
  attachDownloadListener(tabState, tabId, log);
2291
2751
  group.set(tabId, tabState);
@@ -2314,6 +2774,7 @@ app.post('/start', async (req, res) => {
2314
2774
  await ensureBrowser();
2315
2775
  res.json({ ok: true, profile: 'camoufox' });
2316
2776
  } catch (err) {
2777
+ failuresTotal.labels('browser_launch', 'start').inc();
2317
2778
  res.status(500).json({ ok: false, error: safeError(err) });
2318
2779
  }
2319
2780
  });
@@ -2616,8 +3077,9 @@ app.post('/act', async (req, res) => {
2616
3077
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
2617
3078
  await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
2618
3079
  } else {
2619
- const delta = direction === 'up' ? -amount : amount;
2620
- await tabState.page.mouse.wheel(0, delta);
3080
+ const isVertical = direction === 'up' || direction === 'down';
3081
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
3082
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
2621
3083
  }
2622
3084
  await tabState.page.waitForTimeout(300);
2623
3085
  return { ok: true, targetId };
@@ -2717,6 +3179,7 @@ setInterval(async () => {
2717
3179
  await testContext.close();
2718
3180
  healthState.lastSuccessfulNav = Date.now();
2719
3181
  } catch (err) {
3182
+ failuresTotal.labels('health_probe', 'internal').inc();
2720
3183
  log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
2721
3184
  if (testContext) await testContext.close().catch(() => {});
2722
3185
  restartBrowser('health probe failed').catch(() => {});
@@ -2759,12 +3222,21 @@ async function gracefulShutdown(signal) {
2759
3222
  process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
2760
3223
  process.on('SIGINT', () => gracefulShutdown('SIGINT'));
2761
3224
 
3225
+ // Idle self-shutdown REMOVED — it was racing with min_machines_running=2
3226
+ // and stopping machines that Fly couldn't auto-restart fast enough, leaving
3227
+ // only 1 machine to handle all browser traffic (causing timeouts for users).
3228
+ // Fly's auto_stop_machines=false + min_machines_running=2 handles scaling.
3229
+
2762
3230
  const PORT = CONFIG.port;
2763
3231
  const server = app.listen(PORT, async () => {
2764
3232
  startMemoryReporter();
2765
3233
  refreshActiveTabsGauge();
2766
3234
  refreshTabLockQueueDepth();
2767
- log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3235
+ if (FLY_MACHINE_ID) {
3236
+ log('info', 'server started (fly)', { port: PORT, pid: process.pid, machineId: FLY_MACHINE_ID, nodeVersion: process.version });
3237
+ } else {
3238
+ log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3239
+ }
2768
3240
  // Pre-warm browser so first request doesn't eat a 6-7s cold start
2769
3241
  try {
2770
3242
  const start = Date.now();
@@ -2772,8 +3244,10 @@ const server = app.listen(PORT, async () => {
2772
3244
  log('info', 'browser pre-warmed', { ms: Date.now() - start });
2773
3245
  scheduleBrowserIdleShutdown();
2774
3246
  } catch (err) {
2775
- log('error', 'browser pre-warm failed (will retry on first request)', { error: err.message });
3247
+ log('error', 'browser pre-warm failed (will retry in background)', { error: err.message });
3248
+ scheduleBrowserWarmRetry();
2776
3249
  }
3250
+ // Idle self-shutdown removed — Fly manages machine lifecycle via fly.toml.
2777
3251
  });
2778
3252
 
2779
3253
  server.on('error', (err) => {