@askjo/camofox-browser 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,10 +1,13 @@
1
1
  import { Camoufox, launchOptions } from 'camoufox-js';
2
+ import { VirtualDisplay } from 'camoufox-js/dist/virtdisplay.js';
2
3
  import { firefox } from 'playwright-core';
3
4
  import express from 'express';
4
5
  import crypto from 'crypto';
5
6
  import os from 'os';
6
7
  import { expandMacro } from './lib/macros.js';
7
8
  import { loadConfig } from './lib/config.js';
9
+ import { normalizePlaywrightProxy, createProxyPool, buildProxyUrl } from './lib/proxy.js';
10
+ import { createFlyHelpers } from './lib/fly.js';
8
11
  import { windowSnapshot } from './lib/snapshot.js';
9
12
  import {
10
13
  MAX_DOWNLOAD_INLINE_BYTES,
@@ -14,7 +17,15 @@ import {
14
17
  getDownloadsList,
15
18
  extractPageImages,
16
19
  } from './lib/downloads.js';
17
- import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
20
+ import { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
21
+ import {
22
+ register as metricsRegister,
23
+ requestsTotal, requestDuration, pageLoadDuration,
24
+ activeTabsGauge, tabLockQueueDepth,
25
+ tabLockTimeoutsTotal, startMemoryReporter, stopMemoryReporter, actionFromReq,
26
+ failuresTotal, browserRestartsTotal, tabsDestroyedTotal,
27
+ sessionsExpiredTotal, tabsReapedTotal, tabsRecycledTotal, classifyError,
28
+ } from './lib/metrics.js';
18
29
 
19
30
  const CONFIG = loadConfig();
20
31
 
@@ -37,23 +48,44 @@ function log(level, msg, fields = {}) {
37
48
  const app = express();
38
49
  app.use(express.json({ limit: '100kb' }));
39
50
 
40
- // Request logging middleware
51
+ // Request logging + metrics middleware
41
52
  app.use((req, res, next) => {
42
- if (req.path === '/health') return next();
43
53
  const reqId = crypto.randomUUID().slice(0, 8);
44
54
  req.reqId = reqId;
45
55
  req.startTime = Date.now();
56
+
46
57
  const userId = req.body?.userId || req.query?.userId || '-';
47
- log('info', 'req', { reqId, method: req.method, path: req.path, userId });
58
+ if (req.path !== '/health') {
59
+ log('info', 'req', { reqId, method: req.method, path: req.path, userId });
60
+ }
61
+
62
+ const action = actionFromReq(req);
63
+ const done = requestDuration.startTimer({ action });
64
+
48
65
  const origEnd = res.end.bind(res);
49
66
  res.end = function (...args) {
50
67
  const ms = Date.now() - req.startTime;
51
- log('info', 'res', { reqId, status: res.statusCode, ms });
68
+ const isErrorStatus = res.statusCode >= 400;
69
+ requestsTotal.labels(action, isErrorStatus ? 'error' : 'success').inc();
70
+ done();
71
+
72
+ if (req.path !== '/health') {
73
+ log('info', 'res', { reqId, status: res.statusCode, ms });
74
+ }
75
+
52
76
  return origEnd(...args);
53
77
  };
78
+
54
79
  next();
55
80
  });
56
81
 
82
+ // --- Horizontal scaling (Fly.io multi-machine) ---
83
+ const fly = createFlyHelpers(CONFIG);
84
+ const FLY_MACHINE_ID = fly.machineId;
85
+
86
+ // Route tab requests to the owning machine via fly-replay header.
87
+ app.use('/tabs/:tabId', fly.replayMiddleware(log));
88
+
57
89
  const ALLOWED_URL_SCHEMES = ['http:', 'https:'];
58
90
 
59
91
  // Interactive roles to include - exclude combobox to avoid opening complex widgets
@@ -203,6 +235,7 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
203
235
  log('info', 'cookies imported', { reqId: req.reqId, userId: String(userId), count: sanitized.length });
204
236
  res.json(result);
205
237
  } catch (err) {
238
+ failuresTotal.labels(classifyError(err), 'set_cookies').inc();
206
239
  log('error', 'cookie import failed', { reqId: req.reqId, error: err.message });
207
240
  res.status(500).json({ error: safeError(err) });
208
241
  }
@@ -229,6 +262,8 @@ const FAILURE_THRESHOLD = 3;
229
262
  const MAX_CONSECUTIVE_TIMEOUTS = 3;
230
263
  const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
231
264
 
265
+
266
+
232
267
  // Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
233
268
  // WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
234
269
  class TabLock {
@@ -243,9 +278,12 @@ class TabLock {
243
278
  entry.timer = setTimeout(() => {
244
279
  const idx = this.queue.indexOf(entry);
245
280
  if (idx !== -1) this.queue.splice(idx, 1);
281
+ tabLockTimeoutsTotal.inc();
282
+ refreshTabLockQueueDepth();
246
283
  reject(new Error('Tab lock queue timeout'));
247
284
  }, timeoutMs);
248
285
  this.queue.push(entry);
286
+ refreshTabLockQueueDepth();
249
287
  this._tryNext();
250
288
  });
251
289
  }
@@ -253,6 +291,7 @@ class TabLock {
253
291
  release() {
254
292
  this.active = false;
255
293
  this._tryNext();
294
+ refreshTabLockQueueDepth();
256
295
  }
257
296
 
258
297
  _tryNext() {
@@ -260,6 +299,7 @@ class TabLock {
260
299
  this.active = true;
261
300
  const entry = this.queue.shift();
262
301
  clearTimeout(entry.timer);
302
+ refreshTabLockQueueDepth();
263
303
  entry.resolve();
264
304
  }
265
305
 
@@ -270,6 +310,7 @@ class TabLock {
270
310
  entry.reject(new Error('Tab destroyed'));
271
311
  }
272
312
  this.queue = [];
313
+ refreshTabLockQueueDepth();
273
314
  }
274
315
  }
275
316
 
@@ -302,6 +343,10 @@ function withTimeout(promise, ms, label) {
302
343
  ]);
303
344
  }
304
345
 
346
+ function requestTimeoutMs(baseMs = HANDLER_TIMEOUT_MS) {
347
+ return proxyPool?.canRotateSessions ? Math.max(baseMs, 180000) : baseMs;
348
+ }
349
+
305
350
  const userConcurrency = new Map();
306
351
 
307
352
  async function withUserLimit(userId, operation) {
@@ -355,25 +400,27 @@ function getHostOS() {
355
400
  return 'linux';
356
401
  }
357
402
 
358
- function buildProxyConfig() {
359
- const { host, port, username, password } = CONFIG.proxy;
360
-
361
- if (!host || !port) {
362
- log('info', 'no proxy configured');
363
- return null;
364
- }
365
-
366
- log('info', 'proxy configured', { host, port });
367
- return {
368
- server: `http://${host}:${port}`,
369
- username,
370
- password,
371
- };
403
+ // Proxy strategy for outbound browsing.
404
+ const proxyPool = createProxyPool(CONFIG.proxy);
405
+
406
+ if (proxyPool) {
407
+ log('info', 'proxy pool created', {
408
+ mode: proxyPool.mode,
409
+ host: proxyPool.canRotateSessions ? CONFIG.proxy.backconnectHost : CONFIG.proxy.host,
410
+ ports: proxyPool.canRotateSessions ? [CONFIG.proxy.backconnectPort] : CONFIG.proxy.ports,
411
+ poolSize: proxyPool.size,
412
+ country: CONFIG.proxy.country || null,
413
+ state: CONFIG.proxy.state || null,
414
+ city: CONFIG.proxy.city || null,
415
+ });
416
+ } else {
417
+ log('info', 'no proxy configured');
372
418
  }
373
419
 
374
420
  const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
375
421
  let browserIdleTimer = null;
376
422
  let browserLaunchPromise = null;
423
+ let browserWarmRetryTimer = null;
377
424
 
378
425
  function scheduleBrowserIdleShutdown() {
379
426
  clearBrowserIdleTimer();
@@ -396,6 +443,21 @@ function clearBrowserIdleTimer() {
396
443
  }
397
444
  }
398
445
 
446
+ function scheduleBrowserWarmRetry(delayMs = 5000) {
447
+ if (browserWarmRetryTimer || browser || browserLaunchPromise) return;
448
+ browserWarmRetryTimer = setTimeout(async () => {
449
+ browserWarmRetryTimer = null;
450
+ try {
451
+ const start = Date.now();
452
+ await ensureBrowser();
453
+ log('info', 'background browser warm retry succeeded', { ms: Date.now() - start });
454
+ } catch (err) {
455
+ log('warn', 'background browser warm retry failed', { error: err.message, nextDelayMs: delayMs });
456
+ scheduleBrowserWarmRetry(Math.min(delayMs * 2, 30000));
457
+ }
458
+ }, delayMs);
459
+ }
460
+
399
461
  // --- Browser health tracking ---
400
462
  const healthState = {
401
463
  consecutiveNavFailures: 0,
@@ -417,6 +479,7 @@ function recordNavFailure() {
417
479
  async function restartBrowser(reason) {
418
480
  if (healthState.isRecovering) return;
419
481
  healthState.isRecovering = true;
482
+ browserRestartsTotal.labels(reason).inc();
420
483
  log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
421
484
  try {
422
485
  for (const [, session] of sessions) {
@@ -449,29 +512,157 @@ function getTotalTabCount() {
449
512
  return total;
450
513
  }
451
514
 
515
+ // Virtual display for WebGL support and anti-detection.
516
+ // Xvfb gives Firefox a real X display with GLX, enabling software-rendered WebGL
517
+ // via Mesa llvmpipe. Without this, WebGL returns "no context" — a massive bot signal.
518
+ let virtualDisplay = null;
519
+ let browserLaunchProxy = null;
520
+
521
+ async function probeGoogleSearch(candidateBrowser) {
522
+ let context = null;
523
+ try {
524
+ context = await candidateBrowser.newContext({
525
+ viewport: { width: 1280, height: 720 },
526
+ permissions: ['geolocation'],
527
+ });
528
+ const page = await context.newPage();
529
+ await page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 });
530
+ await page.waitForTimeout(1200);
531
+ await page.goto('https://www.google.com/search?q=weather%20today', { waitUntil: 'domcontentloaded', timeout: 30000 });
532
+ await page.waitForTimeout(4000);
533
+
534
+ const blocked = await isGoogleSearchBlocked(page);
535
+ return {
536
+ ok: !blocked && isGoogleSerp(page.url()),
537
+ url: page.url(),
538
+ blocked,
539
+ };
540
+ } finally {
541
+ await context?.close().catch(() => {});
542
+ }
543
+ }
544
+
545
+ function attachBrowserCleanup(candidateBrowser, localVirtualDisplay) {
546
+ const origClose = candidateBrowser.close.bind(candidateBrowser);
547
+ candidateBrowser.close = async (...args) => {
548
+ await origClose(...args);
549
+ browserLaunchProxy = null;
550
+ if (localVirtualDisplay) {
551
+ localVirtualDisplay.kill();
552
+ if (virtualDisplay === localVirtualDisplay) virtualDisplay = null;
553
+ }
554
+ };
555
+ }
556
+
452
557
  async function launchBrowserInstance() {
453
558
  const hostOS = getHostOS();
454
- const proxy = buildProxyConfig();
455
-
456
- log('info', 'launching camoufox', { hostOS, geoip: !!proxy });
457
-
458
- const options = await launchOptions({
459
- headless: true,
460
- os: hostOS,
461
- humanize: true,
462
- enable_cache: true,
463
- proxy: proxy,
464
- geoip: !!proxy,
465
- });
466
-
467
- browser = await firefox.launch(options);
468
- log('info', 'camoufox launched');
469
- return browser;
559
+ const maxAttempts = proxyPool?.launchRetries ?? 1;
560
+ let lastError = null;
561
+
562
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
563
+ const launchProxy = proxyPool
564
+ ? proxyPool.getLaunchProxy(proxyPool.canRotateSessions ? `browser-${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}` : undefined)
565
+ : null;
566
+
567
+ let localVirtualDisplay = null;
568
+ let vdDisplay = undefined;
569
+ let candidateBrowser = null;
570
+
571
+ try {
572
+ if (os.platform() === 'linux') {
573
+ localVirtualDisplay = new VirtualDisplay();
574
+ vdDisplay = localVirtualDisplay.get();
575
+ log('info', 'xvfb virtual display started', { display: vdDisplay, attempt });
576
+ }
577
+ } catch (err) {
578
+ log('warn', 'xvfb not available, falling back to headless', { error: err.message, attempt });
579
+ localVirtualDisplay = null;
580
+ }
581
+
582
+ const useVirtualDisplay = !!vdDisplay;
583
+ log('info', 'launching camoufox', {
584
+ hostOS,
585
+ attempt,
586
+ maxAttempts,
587
+ geoip: !!launchProxy,
588
+ proxyMode: proxyPool?.mode || null,
589
+ proxyServer: launchProxy?.server || null,
590
+ proxySession: launchProxy?.sessionId || null,
591
+ proxyPoolSize: proxyPool?.size || 0,
592
+ virtualDisplay: useVirtualDisplay,
593
+ });
594
+
595
+ try {
596
+ const options = await launchOptions({
597
+ headless: useVirtualDisplay ? false : true,
598
+ os: hostOS,
599
+ humanize: true,
600
+ enable_cache: true,
601
+ proxy: launchProxy,
602
+ geoip: !!launchProxy,
603
+ virtual_display: vdDisplay,
604
+ });
605
+ options.proxy = normalizePlaywrightProxy(options.proxy);
606
+
607
+ candidateBrowser = await firefox.launch(options);
608
+
609
+ if (proxyPool?.canRotateSessions) {
610
+ const probe = await probeGoogleSearch(candidateBrowser);
611
+ if (!probe.ok) {
612
+ log('warn', 'browser launch google probe failed', {
613
+ attempt,
614
+ maxAttempts,
615
+ proxySession: launchProxy?.sessionId || null,
616
+ url: probe.url,
617
+ });
618
+ if (attempt < maxAttempts) {
619
+ await candidateBrowser.close().catch(() => {});
620
+ if (localVirtualDisplay) localVirtualDisplay.kill();
621
+ continue;
622
+ }
623
+ // Last attempt: accept browser in degraded mode rather than death-spiraling.
624
+ // Non-Google sites will still work; Google requests will get blocked responses.
625
+ log('error', 'all proxy sessions Google-blocked, accepting browser in degraded mode', {
626
+ maxAttempts,
627
+ proxySession: launchProxy?.sessionId || null,
628
+ });
629
+ }
630
+ }
631
+
632
+ virtualDisplay = localVirtualDisplay;
633
+ browserLaunchProxy = launchProxy;
634
+ browser = candidateBrowser;
635
+ attachBrowserCleanup(browser, localVirtualDisplay);
636
+
637
+ log('info', 'camoufox launched', {
638
+ attempt,
639
+ maxAttempts,
640
+ virtualDisplay: useVirtualDisplay,
641
+ proxyMode: proxyPool?.mode || null,
642
+ proxyServer: launchProxy?.server || null,
643
+ proxySession: launchProxy?.sessionId || null,
644
+ });
645
+ return browser;
646
+ } catch (err) {
647
+ lastError = err;
648
+ log('warn', 'camoufox launch attempt failed', {
649
+ attempt,
650
+ maxAttempts,
651
+ error: err.message,
652
+ proxySession: launchProxy?.sessionId || null,
653
+ });
654
+ await candidateBrowser?.close().catch(() => {});
655
+ if (localVirtualDisplay) localVirtualDisplay.kill();
656
+ }
657
+ }
658
+
659
+ throw lastError || new Error('Failed to launch a usable browser');
470
660
  }
471
661
 
472
662
  async function ensureBrowser() {
473
663
  clearBrowserIdleTimer();
474
664
  if (browser && !browser.isConnected()) {
665
+ failuresTotal.labels('browser_disconnected', 'internal').inc();
475
666
  log('warn', 'browser disconnected, clearing dead sessions and relaunching', {
476
667
  deadSessions: sessions.size,
477
668
  });
@@ -479,13 +670,20 @@ async function ensureBrowser() {
479
670
  await session.context.close().catch(() => {});
480
671
  }
481
672
  sessions.clear();
673
+ // Clean up virtual display from dead browser before relaunching
674
+ if (virtualDisplay) {
675
+ virtualDisplay.kill();
676
+ virtualDisplay = null;
677
+ }
678
+ browserLaunchProxy = null;
482
679
  browser = null;
483
680
  }
484
681
  if (browser) return browser;
485
682
  if (browserLaunchPromise) return browserLaunchPromise;
683
+ const launchTimeoutMs = proxyPool?.launchTimeoutMs ?? 60000;
486
684
  browserLaunchPromise = Promise.race([
487
685
  launchBrowserInstance(),
488
- new Promise((_, reject) => setTimeout(() => reject(new Error('Browser launch timeout (30s)')), 30000)),
686
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`Browser launch timeout (${Math.round(launchTimeoutMs / 1000)}s)`)), launchTimeoutMs)),
489
687
  ]).finally(() => { browserLaunchPromise = null; });
490
688
  return browserLaunchPromise;
491
689
  }
@@ -528,11 +726,26 @@ async function getSession(userId) {
528
726
  contextOptions.timezoneId = 'America/Los_Angeles';
529
727
  contextOptions.geolocation = { latitude: 37.7749, longitude: -122.4194 };
530
728
  }
729
+ let sessionProxy = null;
730
+ if (proxyPool?.canRotateSessions) {
731
+ sessionProxy = proxyPool.getNext(`ctx-${key}-${crypto.randomUUID().replace(/-/g, '').slice(0, 8)}`);
732
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
733
+ log('info', 'session proxy assigned', { userId: key, sessionId: sessionProxy.sessionId });
734
+ } else if (proxyPool) {
735
+ sessionProxy = proxyPool.getNext();
736
+ contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
737
+ log('info', 'session proxy assigned', { userId: key, proxy: sessionProxy.server });
738
+ }
531
739
  const context = await b.newContext(contextOptions);
532
740
 
533
- session = { context, tabGroups: new Map(), lastAccess: Date.now() };
741
+ session = { context, tabGroups: new Map(), lastAccess: Date.now(), proxySessionId: sessionProxy?.sessionId || null };
534
742
  sessions.set(key, session);
535
- log('info', 'session created', { userId: key });
743
+ log('info', 'session created', {
744
+ userId: key,
745
+ proxyMode: proxyPool?.mode || null,
746
+ proxyServer: sessionProxy?.server || browserLaunchProxy?.server || null,
747
+ proxySession: sessionProxy?.sessionId || browserLaunchProxy?.sessionId || null,
748
+ });
536
749
  }
537
750
  session.lastAccess = Date.now();
538
751
  return session;
@@ -571,11 +784,30 @@ function isTabDestroyedError(err) {
571
784
 
572
785
  // Centralized error handler for route catch blocks.
573
786
  // Auto-destroys dead browser sessions and returns appropriate status codes.
787
+ function isProxyError(err) {
788
+ if (!err) return false;
789
+ const msg = err.message || '';
790
+ return msg.includes('NS_ERROR_PROXY') || msg.includes('proxy connection') || msg.includes('Proxy connection');
791
+ }
792
+
574
793
  function handleRouteError(err, req, res, extraFields = {}) {
794
+ const failureType = classifyError(err);
795
+ const action = actionFromReq(req);
796
+ failuresTotal.labels(failureType, action).inc();
797
+
575
798
  const userId = req.body?.userId || req.query?.userId;
576
799
  if (userId && isDeadContextError(err)) {
577
800
  destroySession(userId);
578
801
  }
802
+ // Proxy errors mean the session is dead — rotate at context level.
803
+ // Destroy the user's session so the next request gets a fresh context with a new proxy.
804
+ if (isProxyError(err) && proxyPool?.canRotateSessions && userId) {
805
+ log('warn', 'proxy error detected, destroying user session for fresh proxy on next request', {
806
+ action, userId, error: err.message,
807
+ });
808
+ browserRestartsTotal.labels('proxy_error').inc();
809
+ destroySession(userId);
810
+ }
579
811
  // Track consecutive timeouts per tab and auto-destroy stuck tabs
580
812
  if (userId && isTimeoutError(err)) {
581
813
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
@@ -586,7 +818,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
586
818
  found.tabState.consecutiveTimeouts++;
587
819
  if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
588
820
  log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
589
- destroyTab(session, tabId);
821
+ destroyTab(session, tabId, 'consecutive_timeouts');
590
822
  }
591
823
  }
592
824
  }
@@ -596,7 +828,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
596
828
  const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
597
829
  const session = sessions.get(normalizeUserId(userId));
598
830
  if (session && tabId) {
599
- destroyTab(session, tabId);
831
+ destroyTab(session, tabId, 'lock_queue');
600
832
  }
601
833
  return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
602
834
  }
@@ -607,25 +839,61 @@ function handleRouteError(err, req, res, extraFields = {}) {
607
839
  sendError(res, err, extraFields);
608
840
  }
609
841
 
610
- function destroyTab(session, tabId) {
842
+ function destroyTab(session, tabId, reason) {
611
843
  const lock = tabLocks.get(tabId);
612
844
  if (lock) {
613
845
  lock.drain();
614
846
  tabLocks.delete(tabId);
847
+ refreshTabLockQueueDepth();
615
848
  }
616
849
  for (const [listItemId, group] of session.tabGroups) {
617
850
  if (group.has(tabId)) {
618
851
  const tabState = group.get(tabId);
619
- log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
852
+ log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls, reason: reason || 'unknown' });
620
853
  safePageClose(tabState.page);
621
854
  group.delete(tabId);
622
855
  if (group.size === 0) session.tabGroups.delete(listItemId);
856
+ refreshActiveTabsGauge();
857
+ if (reason) tabsDestroyedTotal.labels(reason).inc();
623
858
  return true;
624
859
  }
625
860
  }
626
861
  return false;
627
862
  }
628
863
 
864
+ /**
865
+ * Recycle the oldest (least-used) tab in a session to free a slot.
866
+ * Closes the old tab's page and removes it from its group.
867
+ * Returns { recycledTabId, recycledFromGroup } or null if no tab to recycle.
868
+ */
869
+ async function recycleOldestTab(session, reqId) {
870
+ let oldestTab = null;
871
+ let oldestGroup = null;
872
+ let oldestGroupKey = null;
873
+ let oldestTabId = null;
874
+ for (const [gKey, group] of session.tabGroups) {
875
+ for (const [tid, ts] of group) {
876
+ if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
877
+ oldestTab = ts;
878
+ oldestGroup = group;
879
+ oldestGroupKey = gKey;
880
+ oldestTabId = tid;
881
+ }
882
+ }
883
+ }
884
+ if (!oldestTab) return null;
885
+
886
+ await safePageClose(oldestTab.page);
887
+ oldestGroup.delete(oldestTabId);
888
+ if (oldestGroup.size === 0) session.tabGroups.delete(oldestGroupKey);
889
+ const lock = tabLocks.get(oldestTabId);
890
+ if (lock) { lock.drain(); tabLocks.delete(oldestTabId); }
891
+ refreshTabLockQueueDepth();
892
+ tabsRecycledTotal.inc();
893
+ log('info', 'tab recycled (limit reached)', { reqId, recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey });
894
+ return { recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey };
895
+ }
896
+
629
897
  function destroySession(userId) {
630
898
  const key = normalizeUserId(userId);
631
899
  const session = sessions.get(key);
@@ -654,13 +922,89 @@ function createTabState(page) {
654
922
  toolCalls: 0,
655
923
  consecutiveTimeouts: 0,
656
924
  lastSnapshot: null,
925
+ lastRequestedUrl: null,
926
+ googleRetryCount: 0,
657
927
  };
658
928
  }
659
929
 
930
+ async function isGoogleUnavailable(page) {
931
+ if (!page || page.isClosed()) return false;
932
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
933
+ return /Unable to connect|502 Bad Gateway or Proxy Error|Camoufox can’t establish a connection/.test(bodyText);
934
+ }
935
+
936
+ async function rotateGoogleTab(userId, sessionKey, tabId, previousTabState, reason, reqId) {
937
+ if (!previousTabState?.lastRequestedUrl || !isGoogleSearchUrl(previousTabState.lastRequestedUrl)) return null;
938
+ if ((previousTabState.googleRetryCount || 0) >= 3) return null;
939
+
940
+ browserRestartsTotal.labels(reason).inc(); // track rotation events (not a full restart)
941
+
942
+ // Rotate at context level — create a fresh context with a new proxy session
943
+ // instead of restarting the entire browser (which kills ALL sessions/tabs).
944
+ const key = normalizeUserId(userId);
945
+ const oldSession = sessions.get(key);
946
+ if (oldSession) {
947
+ await oldSession.context.close().catch(() => {});
948
+ sessions.delete(key);
949
+ }
950
+ const session = await getSession(userId);
951
+ const group = getTabGroup(session, sessionKey);
952
+ const page = await session.context.newPage();
953
+ const tabState = createTabState(page);
954
+ tabState.googleRetryCount = (previousTabState.googleRetryCount || 0) + 1;
955
+ tabState.lastRequestedUrl = previousTabState.lastRequestedUrl;
956
+ attachDownloadListener(tabState, tabId, log);
957
+ group.set(tabId, tabState);
958
+ refreshActiveTabsGauge();
959
+
960
+ log('warn', 'replaying google search on fresh context (per-context proxy rotation)', {
961
+ reqId,
962
+ tabId,
963
+ retryCount: tabState.googleRetryCount,
964
+ url: tabState.lastRequestedUrl,
965
+ proxySession: session.proxySessionId || null,
966
+ });
967
+
968
+ await withPageLoadDuration('navigate', () => page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
969
+ tabState.visitedUrls.add('https://www.google.com/');
970
+ await page.waitForTimeout(1200);
971
+ await withPageLoadDuration('navigate', () => page.goto(tabState.lastRequestedUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
972
+ tabState.visitedUrls.add(tabState.lastRequestedUrl);
973
+ return { session, tabState };
974
+ }
975
+
976
+ function refreshActiveTabsGauge() {
977
+ activeTabsGauge.set(getTotalTabCount());
978
+ }
979
+
980
+ function refreshTabLockQueueDepth() {
981
+ let queued = 0;
982
+ for (const lock of tabLocks.values()) {
983
+ if (lock?.queue) queued += lock.queue.length;
984
+ }
985
+ tabLockQueueDepth.set(queued);
986
+ }
987
+
988
+ async function withPageLoadDuration(action, fn) {
989
+ const end = pageLoadDuration.startTimer();
990
+ try {
991
+ return await fn();
992
+ } finally {
993
+ end();
994
+ }
995
+ }
996
+
660
997
 
661
998
 
662
999
  async function waitForPageReady(page, options = {}) {
663
- const { timeout = 10000, waitForNetwork = true } = options;
1000
+ const {
1001
+ timeout = 10000,
1002
+ waitForNetwork = true,
1003
+ waitForHydration = true,
1004
+ settleMs = 200,
1005
+ hydrationPollMs = 250,
1006
+ hydrationTimeoutMs = Math.min(timeout, 10000),
1007
+ } = options;
664
1008
 
665
1009
  try {
666
1010
  await page.waitForLoadState('domcontentloaded', { timeout });
@@ -671,27 +1015,28 @@ async function waitForPageReady(page, options = {}) {
671
1015
  });
672
1016
  }
673
1017
 
674
- // Framework hydration wait (React/Next.js/Vue) - mirrors Swift WebView.swift logic
675
- // Wait for readyState === 'complete' + network quiet (40 iterations × 250ms max)
676
- await page.evaluate(async () => {
677
- for (let i = 0; i < 40; i++) {
678
- // Check if network is quiet (no recent resource loads)
679
- const entries = performance.getEntriesByType('resource');
680
- const recentEntries = entries.slice(-5);
681
- const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
682
-
683
- if (document.readyState === 'complete' && netQuiet) {
684
- // Double RAF to ensure paint is complete
685
- await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
686
- break;
1018
+ if (waitForHydration) {
1019
+ const maxIterations = Math.max(1, Math.floor(hydrationTimeoutMs / hydrationPollMs));
1020
+ await page.evaluate(async ({ maxIterations, hydrationPollMs }) => {
1021
+ for (let i = 0; i < maxIterations; i++) {
1022
+ const entries = performance.getEntriesByType('resource');
1023
+ const recentEntries = entries.slice(-5);
1024
+ const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
1025
+
1026
+ if (document.readyState === 'complete' && netQuiet) {
1027
+ await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
1028
+ break;
1029
+ }
1030
+ await new Promise(r => setTimeout(r, hydrationPollMs));
687
1031
  }
688
- await new Promise(r => setTimeout(r, 250));
689
- }
690
- }).catch(() => {
691
- log('warn', 'hydration wait failed, continuing');
692
- });
1032
+ }, { maxIterations, hydrationPollMs }).catch(() => {
1033
+ log('warn', 'hydration wait failed, continuing');
1034
+ });
1035
+ }
693
1036
 
694
- await page.waitForTimeout(200);
1037
+ if (settleMs > 0) {
1038
+ await page.waitForTimeout(settleMs);
1039
+ }
695
1040
 
696
1041
  // Auto-dismiss common consent/privacy dialogs
697
1042
  await dismissConsentDialogs(page);
@@ -758,6 +1103,25 @@ function isGoogleSerp(url) {
758
1103
  }
759
1104
  }
760
1105
 
1106
+ function isGoogleSearchUrl(url) {
1107
+ try {
1108
+ const parsed = new URL(url);
1109
+ return parsed.hostname.includes('google.') && parsed.pathname === '/search';
1110
+ } catch {
1111
+ return false;
1112
+ }
1113
+ }
1114
+
1115
+ async function isGoogleSearchBlocked(page) {
1116
+ if (!page || page.isClosed()) return false;
1117
+
1118
+ const url = page.url();
1119
+ if (url.includes('google.com/sorry/')) return true;
1120
+
1121
+ const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
1122
+ return /Our systems have detected unusual traffic|About this page|If you're having trouble accessing Google Search|SG_REL/.test(bodyText);
1123
+ }
1124
+
761
1125
  // --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
762
1126
  // Returns { refs: Map, snapshot: string }
763
1127
  async function extractGoogleSerp(page) {
@@ -898,6 +1262,8 @@ async function extractGoogleSerp(page) {
898
1262
  return { refs, snapshot: extracted.snapshot };
899
1263
  }
900
1264
 
1265
+ const REFRESH_READY_TIMEOUT_MS = 2500;
1266
+
901
1267
  async function buildRefs(page) {
902
1268
  const refs = new Map();
903
1269
 
@@ -916,16 +1282,20 @@ async function buildRefs(page) {
916
1282
  const start = Date.now();
917
1283
 
918
1284
  // Hard total timeout on the entire buildRefs operation
919
- const timeoutPromise = new Promise((_, reject) =>
920
- setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
921
- );
1285
+ let timerId;
1286
+ const timeoutPromise = new Promise((_, reject) => {
1287
+ timerId = setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS);
1288
+ });
922
1289
 
923
1290
  try {
924
- return await Promise.race([
1291
+ const result = await Promise.race([
925
1292
  _buildRefsInner(page, refs, start),
926
1293
  timeoutPromise
927
1294
  ]);
1295
+ clearTimeout(timerId);
1296
+ return result;
928
1297
  } catch (err) {
1298
+ clearTimeout(timerId);
929
1299
  if (err.message === 'buildRefs_timeout') {
930
1300
  log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
931
1301
  return refs;
@@ -935,7 +1305,12 @@ async function buildRefs(page) {
935
1305
  }
936
1306
 
937
1307
  async function _buildRefsInner(page, refs, start) {
938
- await waitForPageReady(page, { waitForNetwork: false });
1308
+ await waitForPageReady(page, {
1309
+ timeout: REFRESH_READY_TIMEOUT_MS,
1310
+ waitForNetwork: false,
1311
+ waitForHydration: false,
1312
+ settleMs: 100,
1313
+ });
939
1314
 
940
1315
  // Budget remaining time for ariaSnapshot
941
1316
  const elapsed = Date.now() - start;
@@ -1004,7 +1379,12 @@ async function getAriaSnapshot(page) {
1004
1379
  if (!page || page.isClosed()) {
1005
1380
  return null;
1006
1381
  }
1007
- await waitForPageReady(page, { waitForNetwork: false });
1382
+ await waitForPageReady(page, {
1383
+ timeout: REFRESH_READY_TIMEOUT_MS,
1384
+ waitForNetwork: false,
1385
+ waitForHydration: false,
1386
+ settleMs: 100,
1387
+ });
1008
1388
  try {
1009
1389
  return await page.locator('body').ariaSnapshot({ timeout: 5000 });
1010
1390
  } catch (err) {
@@ -1027,11 +1407,46 @@ function refToLocator(page, ref, refs) {
1027
1407
  return locator;
1028
1408
  }
1029
1409
 
1410
+ async function refreshTabRefs(tabState, options = {}) {
1411
+ const {
1412
+ reason = 'refresh',
1413
+ timeoutMs = null,
1414
+ preserveExistingOnEmpty = true,
1415
+ } = options;
1416
+
1417
+ const beforeUrl = tabState.page?.url?.() || '';
1418
+ const existingRefs = tabState.refs instanceof Map ? tabState.refs : new Map();
1419
+ const refreshPromise = buildRefs(tabState.page);
1420
+
1421
+ let refreshedRefs;
1422
+ if (timeoutMs) {
1423
+ const timeoutLabel = `${reason}_refs_timeout`;
1424
+ refreshedRefs = await Promise.race([
1425
+ refreshPromise,
1426
+ new Promise((_, reject) => setTimeout(() => reject(new Error(timeoutLabel)), timeoutMs)),
1427
+ ]);
1428
+ } else {
1429
+ refreshedRefs = await refreshPromise;
1430
+ }
1431
+
1432
+ const afterUrl = tabState.page?.url?.() || beforeUrl;
1433
+ if (preserveExistingOnEmpty && refreshedRefs.size === 0 && existingRefs.size > 0 && beforeUrl === afterUrl) {
1434
+ log('warn', 'preserving previous refs after empty rebuild', {
1435
+ reason,
1436
+ url: afterUrl,
1437
+ previousRefs: existingRefs.size,
1438
+ });
1439
+ return existingRefs;
1440
+ }
1441
+
1442
+ return refreshedRefs;
1443
+ }
1444
+
1030
1445
  // --- YouTube transcript ---
1031
1446
  // Implementation extracted to lib/youtube.js to avoid scanner false positives
1032
1447
  // (child_process + app.post in same file triggers OpenClaw skill-scanner)
1033
1448
 
1034
- detectYtDlp(log);
1449
+ await detectYtDlp(log);
1035
1450
 
1036
1451
  app.post('/youtube/transcript', async (req, res) => {
1037
1452
  const reqId = req.reqId;
@@ -1051,14 +1466,23 @@ app.post('/youtube/transcript', async (req, res) => {
1051
1466
  const videoId = videoIdMatch[1];
1052
1467
  const lang = languages[0] || 'en';
1053
1468
 
1054
- log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
1469
+ // Re-detect yt-dlp if startup detection failed (transient issue)
1470
+ await ensureYtDlp(log);
1471
+
1472
+ const ytDlpProxyUrl = buildProxyUrl(proxyPool, CONFIG.proxy);
1473
+ log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser', hasProxy: !!ytDlpProxyUrl });
1055
1474
 
1056
1475
  let result;
1057
1476
  if (hasYtDlp()) {
1058
1477
  try {
1059
- result = await ytDlpTranscript(reqId, url, videoId, lang);
1478
+ result = await ytDlpTranscript(reqId, url, videoId, lang, ytDlpProxyUrl);
1060
1479
  } catch (ytErr) {
1061
- log('warn', 'yt-dlp failed, falling back to browser', { reqId, error: ytErr.message });
1480
+ log('warn', 'yt-dlp threw, falling back to browser', { reqId, error: ytErr.message });
1481
+ result = null;
1482
+ }
1483
+ // If yt-dlp returned an error result (e.g. no captions) or threw, try browser
1484
+ if (!result || result.status !== 'ok') {
1485
+ if (result) log('warn', 'yt-dlp returned error, falling back to browser', { reqId, status: result.status, code: result.code });
1062
1486
  result = await browserTranscript(reqId, url, videoId, lang);
1063
1487
  }
1064
1488
  } else {
@@ -1068,6 +1492,7 @@ app.post('/youtube/transcript', async (req, res) => {
1068
1492
  log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
1069
1493
  res.json(result);
1070
1494
  } catch (err) {
1495
+ failuresTotal.labels(classifyError(err), 'youtube_transcript').inc();
1071
1496
  log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
1072
1497
  res.status(500).json({ error: safeError(err) });
1073
1498
  }
@@ -1186,6 +1611,16 @@ async function browserTranscript(reqId, url, videoId, lang) {
1186
1611
  };
1187
1612
  } finally {
1188
1613
  await safePageClose(page);
1614
+ // Clean up phantom transcript session if no tabs remain
1615
+ const ytSession = sessions.get(normalizeUserId('__yt_transcript__'));
1616
+ if (ytSession) {
1617
+ let totalTabs = 0;
1618
+ for (const g of ytSession.tabGroups.values()) totalTabs += g.size;
1619
+ if (totalTabs === 0) {
1620
+ ytSession.context.close().catch(() => {});
1621
+ sessions.delete(normalizeUserId('__yt_transcript__'));
1622
+ }
1623
+ }
1189
1624
  }
1190
1625
  });
1191
1626
  }
@@ -1195,16 +1630,34 @@ app.get('/health', (req, res) => {
1195
1630
  return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
1196
1631
  }
1197
1632
  const running = browser !== null && (browser.isConnected?.() ?? false);
1633
+ if (proxyPool?.canRotateSessions && !running) {
1634
+ scheduleBrowserWarmRetry();
1635
+ return res.status(503).json({
1636
+ ok: false,
1637
+ engine: 'camoufox',
1638
+ browserConnected: false,
1639
+ browserRunning: false,
1640
+ warming: true,
1641
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1642
+ });
1643
+ }
1198
1644
  res.json({
1199
1645
  ok: true,
1200
1646
  engine: 'camoufox',
1201
1647
  browserConnected: running,
1202
1648
  browserRunning: running,
1203
1649
  activeTabs: getTotalTabCount(),
1650
+ activeSessions: sessions.size,
1204
1651
  consecutiveFailures: healthState.consecutiveNavFailures,
1652
+ ...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
1205
1653
  });
1206
1654
  });
1207
1655
 
1656
+ app.get('/metrics', async (_req, res) => {
1657
+ res.set('Content-Type', metricsRegister.contentType);
1658
+ res.send(await metricsRegister.metrics());
1659
+ });
1660
+
1208
1661
  // Create new tab
1209
1662
  app.post('/tabs', async (req, res) => {
1210
1663
  try {
@@ -1220,32 +1673,35 @@ app.post('/tabs', async (req, res) => {
1220
1673
 
1221
1674
  let totalTabs = 0;
1222
1675
  for (const group of session.tabGroups.values()) totalTabs += group.size;
1223
- if (totalTabs >= MAX_TABS_PER_SESSION) {
1224
- throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1225
- }
1226
1676
 
1227
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
1228
- throw Object.assign(new Error('Maximum global tabs reached'), { statusCode: 429 });
1677
+ // Recycle oldest tab when limits are reached instead of rejecting
1678
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
1679
+ const recycled = await recycleOldestTab(session, req.reqId);
1680
+ if (!recycled) {
1681
+ throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1682
+ }
1229
1683
  }
1230
1684
 
1231
1685
  const group = getTabGroup(session, resolvedSessionKey);
1232
1686
 
1233
1687
  const page = await session.context.newPage();
1234
- const tabId = crypto.randomUUID();
1688
+ const tabId = fly.makeTabId();
1235
1689
  const tabState = createTabState(page);
1236
1690
  attachDownloadListener(tabState, tabId);
1237
1691
  group.set(tabId, tabState);
1692
+ refreshActiveTabsGauge();
1238
1693
 
1239
1694
  if (url) {
1240
1695
  const urlErr = validateUrl(url);
1241
1696
  if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
1242
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1697
+ tabState.lastRequestedUrl = url;
1698
+ await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1243
1699
  tabState.visitedUrls.add(url);
1244
1700
  }
1245
1701
 
1246
1702
  log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
1247
1703
  return { tabId, url: page.url() };
1248
- })(), HANDLER_TIMEOUT_MS, 'tab create');
1704
+ })(), requestTimeoutMs(), 'tab create');
1249
1705
 
1250
1706
  res.json(result);
1251
1707
  } catch (err) {
@@ -1264,45 +1720,29 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1264
1720
 
1265
1721
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1266
1722
  await ensureBrowser();
1723
+ const resolvedSessionKey = sessionKey || listItemId || 'default';
1267
1724
  let session = sessions.get(normalizeUserId(userId));
1268
1725
  let found = session && findTab(session, tabId);
1269
1726
 
1270
1727
  let tabState;
1271
1728
  if (!found) {
1272
- const resolvedSessionKey = sessionKey || listItemId || 'default';
1273
1729
  session = await getSession(userId);
1274
1730
  let sessionTabs = 0;
1275
1731
  for (const g of session.tabGroups.values()) sessionTabs += g.size;
1276
1732
  if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
1277
- // Reuse oldest tab in session instead of rejecting
1278
- let oldestTab = null;
1279
- let oldestGroup = null;
1280
- let oldestTabId = null;
1281
- for (const [gKey, group] of session.tabGroups) {
1282
- for (const [tid, ts] of group) {
1283
- if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
1284
- oldestTab = ts;
1285
- oldestGroup = group;
1286
- oldestTabId = tid;
1287
- }
1288
- }
1289
- }
1290
- if (oldestTab) {
1291
- tabState = oldestTab;
1292
- const group = getTabGroup(session, resolvedSessionKey);
1293
- if (oldestGroup) oldestGroup.delete(oldestTabId);
1294
- group.set(tabId, tabState);
1295
- { const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
1296
- log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
1297
- } else {
1733
+ // Recycle oldest tab to free a slot, then create new page
1734
+ const recycled = await recycleOldestTab(session, req.reqId);
1735
+ if (!recycled) {
1298
1736
  throw new Error('Maximum tabs per session reached');
1299
1737
  }
1300
- } else {
1738
+ }
1739
+ {
1301
1740
  const page = await session.context.newPage();
1302
1741
  tabState = createTabState(page);
1303
1742
  attachDownloadListener(tabState, tabId, log);
1304
1743
  const group = getTabGroup(session, resolvedSessionKey);
1305
1744
  group.set(tabId, tabState);
1745
+ refreshActiveTabsGauge();
1306
1746
  log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
1307
1747
  }
1308
1748
  } else {
@@ -1311,7 +1751,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1311
1751
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1312
1752
 
1313
1753
  let targetUrl = url;
1314
- if (macro) {
1754
+ if (macro && macro !== '__NO__' && macro !== 'none' && macro !== 'null') {
1315
1755
  targetUrl = expandMacro(macro, query) || url;
1316
1756
  }
1317
1757
 
@@ -1321,9 +1761,61 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1321
1761
  if (urlErr) throw new Error(urlErr);
1322
1762
 
1323
1763
  return await withTabLock(tabId, async () => {
1324
- await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
1325
- tabState.visitedUrls.add(targetUrl);
1326
- tabState.lastSnapshot = null;
1764
+ const currentSessionKey = found?.listItemId || resolvedSessionKey;
1765
+ const isGoogleSearch = isGoogleSearchUrl(targetUrl);
1766
+
1767
+ const navigateCurrentPage = async () => {
1768
+ tabState.lastRequestedUrl = targetUrl;
1769
+ await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
1770
+ tabState.visitedUrls.add(targetUrl);
1771
+ tabState.lastSnapshot = null;
1772
+ };
1773
+
1774
+ const prewarmGoogleHome = async () => {
1775
+ if (!isGoogleSearch || tabState.visitedUrls.has('https://www.google.com/')) return;
1776
+ await withPageLoadDuration('navigate', () => tabState.page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
1777
+ tabState.visitedUrls.add('https://www.google.com/');
1778
+ await tabState.page.waitForTimeout(1200);
1779
+ };
1780
+
1781
+ const recreateTabOnFreshContext = async () => {
1782
+ const previousRetryCount = tabState.googleRetryCount || 0;
1783
+ browserRestartsTotal.labels('google_search_block').inc();
1784
+ // Rotate at context level — destroy this user's session and create
1785
+ // a fresh one with a new proxy session. Does NOT restart the browser.
1786
+ const key = normalizeUserId(userId);
1787
+ const oldSession = sessions.get(key);
1788
+ if (oldSession) {
1789
+ await oldSession.context.close().catch(() => {});
1790
+ sessions.delete(key);
1791
+ }
1792
+ session = await getSession(userId);
1793
+ const group = getTabGroup(session, currentSessionKey);
1794
+ const page = await session.context.newPage();
1795
+ tabState = createTabState(page);
1796
+ tabState.googleRetryCount = previousRetryCount + 1;
1797
+ attachDownloadListener(tabState, tabId, log);
1798
+ group.set(tabId, tabState);
1799
+ refreshActiveTabsGauge();
1800
+ };
1801
+
1802
+ if (isGoogleSearch && proxyPool?.canRotateSessions) {
1803
+ await prewarmGoogleHome();
1804
+ }
1805
+
1806
+ await navigateCurrentPage();
1807
+
1808
+ if (isGoogleSearch && proxyPool?.canRotateSessions && await isGoogleSearchBlocked(tabState.page)) {
1809
+ log('warn', 'google search blocked, rotating browser proxy session', {
1810
+ reqId: req.reqId,
1811
+ tabId,
1812
+ url: tabState.page.url(),
1813
+ proxySession: browserLaunchProxy?.sessionId || null,
1814
+ });
1815
+ await recreateTabOnFreshContext();
1816
+ await prewarmGoogleHome();
1817
+ await navigateCurrentPage();
1818
+ }
1327
1819
 
1328
1820
  // For Google SERP: skip eager ref building during navigate.
1329
1821
  // Results render asynchronously after DOMContentLoaded — the snapshot
@@ -1332,18 +1824,22 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1332
1824
  tabState.refs = new Map();
1333
1825
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
1334
1826
  }
1827
+
1828
+ if (isGoogleSearch && await isGoogleSearchBlocked(tabState.page)) {
1829
+ return { ok: false, tabId, url: tabState.page.url(), refsAvailable: false, googleBlocked: true };
1830
+ }
1335
1831
 
1336
1832
  tabState.refs = await buildRefs(tabState.page);
1337
1833
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
1338
- });
1339
- })(), HANDLER_TIMEOUT_MS, 'navigate'));
1834
+ }, requestTimeoutMs());
1835
+ })(), requestTimeoutMs(), 'navigate'));
1340
1836
 
1341
1837
  log('info', 'navigated', { reqId: req.reqId, tabId, url: result.url });
1342
1838
  res.json(result);
1343
1839
  } catch (err) {
1344
1840
  log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
1345
- const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
1346
- if (status === 400) {
1841
+ const is400 = err.message && (err.message.startsWith('Blocked URL scheme') || err.message === 'url or macro required');
1842
+ if (is400) {
1347
1843
  return res.status(400).json({ error: safeError(err) });
1348
1844
  }
1349
1845
  handleRouteError(err, req, res);
@@ -1377,6 +1873,25 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1377
1873
  }
1378
1874
 
1379
1875
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1876
+ if (proxyPool?.canRotateSessions && isGoogleSearchUrl(tabState.lastRequestedUrl || '')) {
1877
+ const blocked = await isGoogleSearchBlocked(tabState.page);
1878
+ const unavailable = !blocked && await isGoogleUnavailable(tabState.page);
1879
+ if (blocked || unavailable) {
1880
+ const rotated = await rotateGoogleTab(userId, found.listItemId, req.params.tabId, tabState, blocked ? 'google_search_block_snapshot' : 'google_search_unavailable_snapshot', req.reqId);
1881
+ if (rotated) {
1882
+ tabState.page = rotated.tabState.page;
1883
+ tabState.refs = rotated.tabState.refs;
1884
+ tabState.visitedUrls = rotated.tabState.visitedUrls;
1885
+ tabState.downloads = rotated.tabState.downloads;
1886
+ tabState.toolCalls = rotated.tabState.toolCalls;
1887
+ tabState.consecutiveTimeouts = rotated.tabState.consecutiveTimeouts;
1888
+ tabState.lastSnapshot = rotated.tabState.lastSnapshot;
1889
+ tabState.lastRequestedUrl = rotated.tabState.lastRequestedUrl;
1890
+ tabState.googleRetryCount = rotated.tabState.googleRetryCount;
1891
+ }
1892
+ }
1893
+ }
1894
+
1380
1895
  const pageUrl = tabState.page.url();
1381
1896
 
1382
1897
  // Google SERP fast path — DOM extraction instead of ariaSnapshot
@@ -1402,7 +1917,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1402
1917
  return response;
1403
1918
  }
1404
1919
 
1405
- tabState.refs = await buildRefs(tabState.page);
1920
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'snapshot' });
1406
1921
  const ariaYaml = await getAriaSnapshot(tabState.page);
1407
1922
 
1408
1923
  let annotatedYaml = ariaYaml || '';
@@ -1458,7 +1973,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1458
1973
  }
1459
1974
 
1460
1975
  return response;
1461
- })(), HANDLER_TIMEOUT_MS, 'snapshot'));
1976
+ })(), requestTimeoutMs(), 'snapshot'));
1462
1977
 
1463
1978
  log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
1464
1979
  res.json(result);
@@ -1576,9 +2091,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1576
2091
  log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
1577
2092
  try {
1578
2093
  const preClickBudget = Math.min(4000, remainingBudget());
1579
- const refreshPromise = buildRefs(tabState.page);
1580
- const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
1581
- tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
2094
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'pre_click', timeoutMs: preClickBudget });
1582
2095
  } catch (e) {
1583
2096
  if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1584
2097
  log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
@@ -1618,9 +2131,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1618
2131
  // If it times out, return without refs (caller's next /snapshot will rebuild them).
1619
2132
  const postClickBudget = Math.max(2000, remainingBudget());
1620
2133
  try {
1621
- const refsPromise = buildRefs(tabState.page);
1622
- const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
1623
- tabState.refs = await Promise.race([refsPromise, refsBudget]);
2134
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'post_click', timeoutMs: postClickBudget });
1624
2135
  } catch (e) {
1625
2136
  if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1626
2137
  log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
@@ -1644,7 +2155,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1644
2155
  const session = sessions.get(normalizeUserId(req.body.userId));
1645
2156
  const found = session && findTab(session, tabId);
1646
2157
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1647
- found.tabState.refs = await buildRefs(found.tabState.page);
2158
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'click_timeout' });
1648
2159
  found.tabState.lastSnapshot = null;
1649
2160
  return res.status(500).json({
1650
2161
  error: safeError(err),
@@ -1683,7 +2194,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1683
2194
  let locator = refToLocator(tabState.page, ref, tabState.refs);
1684
2195
  if (!locator) {
1685
2196
  log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
1686
- tabState.refs = await buildRefs(tabState.page);
2197
+ tabState.refs = await refreshTabRefs(tabState, { reason: 'type' });
1687
2198
  locator = refToLocator(tabState.page, ref, tabState.refs);
1688
2199
  }
1689
2200
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
@@ -1701,7 +2212,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1701
2212
  const session = sessions.get(normalizeUserId(req.body.userId));
1702
2213
  const found = session && findTab(session, tabId);
1703
2214
  if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1704
- found.tabState.refs = await buildRefs(found.tabState.page);
2215
+ found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'type_timeout' });
1705
2216
  found.tabState.lastSnapshot = null;
1706
2217
  return res.status(500).json({
1707
2218
  error: safeError(err),
@@ -1753,8 +2264,9 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
1753
2264
  const { tabState } = found;
1754
2265
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1755
2266
 
1756
- const delta = direction === 'up' ? -amount : amount;
1757
- await tabState.page.mouse.wheel(0, delta);
2267
+ const isVertical = direction === 'up' || direction === 'down';
2268
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
2269
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
1758
2270
  await tabState.page.waitForTimeout(300);
1759
2271
 
1760
2272
  res.json({ ok: true });
@@ -1778,7 +2290,17 @@ app.post('/tabs/:tabId/back', async (req, res) => {
1778
2290
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1779
2291
 
1780
2292
  const result = await withTabLock(tabId, async () => {
1781
- await tabState.page.goBack({ timeout: 10000 });
2293
+ try {
2294
+ await tabState.page.goBack({ timeout: 10000 });
2295
+ } catch (navErr) {
2296
+ // NS_BINDING_CANCELLED_OLD_LOAD: Firefox cancels the old load when going back.
2297
+ // The navigation itself succeeded — just the prior page's load was interrupted.
2298
+ if (navErr.message && navErr.message.includes('NS_BINDING_CANCELLED')) {
2299
+ log('info', 'goBack cancelled old load (expected)', { reqId: req.reqId, tabId });
2300
+ } else {
2301
+ throw navErr;
2302
+ }
2303
+ }
1782
2304
  tabState.refs = await buildRefs(tabState.page);
1783
2305
  return { ok: true, url: tabState.page.url() };
1784
2306
  });
@@ -1906,6 +2428,7 @@ app.get('/tabs/:tabId/downloads', async (req, res) => {
1906
2428
 
1907
2429
  res.json({ tabId: req.params.tabId, downloads });
1908
2430
  } catch (err) {
2431
+ failuresTotal.labels(classifyError(err), 'downloads').inc();
1909
2432
  log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
1910
2433
  res.status(500).json({ error: safeError(err) });
1911
2434
  }
@@ -1931,6 +2454,7 @@ app.get('/tabs/:tabId/images', async (req, res) => {
1931
2454
 
1932
2455
  res.json({ tabId: req.params.tabId, images });
1933
2456
  } catch (err) {
2457
+ failuresTotal.labels(classifyError(err), 'images').inc();
1934
2458
  log('error', 'images failed', { reqId: req.reqId, error: err.message });
1935
2459
  res.status(500).json({ error: safeError(err) });
1936
2460
  }
@@ -1999,6 +2523,7 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
1999
2523
  log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
2000
2524
  res.json({ ok: true, result });
2001
2525
  } catch (err) {
2526
+ failuresTotal.labels(classifyError(err), 'evaluate').inc();
2002
2527
  log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
2003
2528
  res.status(500).json({ error: safeError(err) });
2004
2529
  }
@@ -2007,17 +2532,19 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
2007
2532
  // Close tab
2008
2533
  app.delete('/tabs/:tabId', async (req, res) => {
2009
2534
  try {
2010
- const { userId } = req.body;
2535
+ const userId = req.query.userId || req.body?.userId;
2536
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2011
2537
  const session = sessions.get(normalizeUserId(userId));
2012
2538
  const found = session && findTab(session, req.params.tabId);
2013
2539
  if (found) {
2014
2540
  await clearTabDownloads(found.tabState);
2015
2541
  await safePageClose(found.tabState.page);
2016
2542
  found.group.delete(req.params.tabId);
2017
- { const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); }
2543
+ { const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); refreshTabLockQueueDepth(); }
2018
2544
  if (found.group.size === 0) {
2019
2545
  session.tabGroups.delete(found.listItemId);
2020
2546
  }
2547
+ refreshActiveTabsGauge();
2021
2548
  log('info', 'tab closed', { reqId: req.reqId, tabId: req.params.tabId, userId });
2022
2549
  }
2023
2550
  res.json({ ok: true });
@@ -2030,16 +2557,23 @@ app.delete('/tabs/:tabId', async (req, res) => {
2030
2557
  // Close tab group
2031
2558
  app.delete('/tabs/group/:listItemId', async (req, res) => {
2032
2559
  try {
2033
- const { userId } = req.body;
2560
+ const userId = req.query.userId || req.body?.userId;
2561
+ if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
2034
2562
  const session = sessions.get(normalizeUserId(userId));
2035
2563
  const group = session?.tabGroups.get(req.params.listItemId);
2036
2564
  if (group) {
2037
2565
  for (const [tabId, tabState] of group) {
2038
2566
  await clearTabDownloads(tabState);
2039
2567
  await safePageClose(tabState.page);
2040
- tabLocks.delete(tabId);
2568
+ const lock = tabLocks.get(tabId);
2569
+ if (lock) {
2570
+ lock.drain();
2571
+ tabLocks.delete(tabId);
2572
+ }
2041
2573
  }
2042
2574
  session.tabGroups.delete(req.params.listItemId);
2575
+ refreshTabLockQueueDepth();
2576
+ refreshActiveTabsGauge();
2043
2577
  log('info', 'tab group closed', { reqId: req.reqId, listItemId: req.params.listItemId, userId });
2044
2578
  }
2045
2579
  res.json({ ok: true });
@@ -2058,6 +2592,18 @@ app.delete('/sessions/:userId', async (req, res) => {
2058
2592
  await clearSessionDownloads(session);
2059
2593
  await session.context.close();
2060
2594
  sessions.delete(userId);
2595
+ // Remove any lingering tab locks for the session
2596
+ for (const [listItemId, group] of session.tabGroups) {
2597
+ for (const tabId of group.keys()) {
2598
+ const lock = tabLocks.get(tabId);
2599
+ if (lock) {
2600
+ lock.drain();
2601
+ tabLocks.delete(tabId);
2602
+ }
2603
+ }
2604
+ }
2605
+ refreshTabLockQueueDepth();
2606
+ refreshActiveTabsGauge();
2061
2607
  log('info', 'session closed', { userId });
2062
2608
  }
2063
2609
  if (sessions.size === 0) scheduleBrowserIdleShutdown();
@@ -2073,9 +2619,11 @@ setInterval(() => {
2073
2619
  const now = Date.now();
2074
2620
  for (const [userId, session] of sessions) {
2075
2621
  if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
2622
+ sessionsExpiredTotal.inc();
2076
2623
  clearSessionDownloads(session).catch(() => {});
2077
2624
  session.context.close().catch(() => {});
2078
2625
  sessions.delete(userId);
2626
+ refreshActiveTabsGauge();
2079
2627
  log('info', 'session expired', { userId });
2080
2628
  }
2081
2629
  }
@@ -2083,6 +2631,7 @@ setInterval(() => {
2083
2631
  if (sessions.size === 0) {
2084
2632
  scheduleBrowserIdleShutdown();
2085
2633
  }
2634
+ refreshTabLockQueueDepth();
2086
2635
  }, 60_000);
2087
2636
 
2088
2637
  // Per-tab inactivity reaper — close tabs idle for TAB_INACTIVITY_MS
@@ -2099,10 +2648,13 @@ setInterval(() => {
2099
2648
  if (tabState.toolCalls === tabState._lastReaperToolCalls) {
2100
2649
  const idleMs = now - tabState._lastReaperCheck;
2101
2650
  if (idleMs >= TAB_INACTIVITY_MS) {
2651
+ tabsReapedTotal.inc();
2102
2652
  log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
2103
2653
  safePageClose(tabState.page);
2104
2654
  group.delete(tabId);
2105
2655
  { const _l = tabLocks.get(tabId); if (_l) _l.drain(); tabLocks.delete(tabId); }
2656
+ refreshTabLockQueueDepth();
2657
+ refreshActiveTabsGauge();
2106
2658
  }
2107
2659
  } else {
2108
2660
  tabState._lastReaperCheck = now;
@@ -2180,26 +2732,26 @@ app.post('/tabs/open', async (req, res) => {
2180
2732
 
2181
2733
  const session = await getSession(userId);
2182
2734
 
2183
- // Check global tab limit first
2184
- if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
2185
- return res.status(429).json({ error: 'Maximum global tabs reached' });
2186
- }
2187
-
2735
+ // Recycle oldest tab when limits are reached instead of rejecting
2188
2736
  let totalTabs = 0;
2189
2737
  for (const g of session.tabGroups.values()) totalTabs += g.size;
2190
- if (totalTabs >= MAX_TABS_PER_SESSION) {
2191
- return res.status(429).json({ error: 'Maximum tabs per session reached' });
2738
+ if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
2739
+ const recycled = await recycleOldestTab(session, req.reqId);
2740
+ if (!recycled) {
2741
+ return res.status(429).json({ error: 'Maximum tabs per session reached' });
2742
+ }
2192
2743
  }
2193
2744
 
2194
2745
  const group = getTabGroup(session, listItemId);
2195
2746
 
2196
2747
  const page = await session.context.newPage();
2197
- const tabId = crypto.randomUUID();
2748
+ const tabId = fly.makeTabId();
2198
2749
  const tabState = createTabState(page);
2199
2750
  attachDownloadListener(tabState, tabId, log);
2200
2751
  group.set(tabId, tabState);
2752
+ refreshActiveTabsGauge();
2201
2753
 
2202
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
2754
+ await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
2203
2755
  tabState.visitedUrls.add(url);
2204
2756
 
2205
2757
  log('info', 'openclaw tab opened', { reqId: req.reqId, tabId, url: page.url() });
@@ -2222,6 +2774,7 @@ app.post('/start', async (req, res) => {
2222
2774
  await ensureBrowser();
2223
2775
  res.json({ ok: true, profile: 'camoufox' });
2224
2776
  } catch (err) {
2777
+ failuresTotal.labels('browser_launch', 'start').inc();
2225
2778
  res.status(500).json({ ok: false, error: safeError(err) });
2226
2779
  }
2227
2780
  });
@@ -2242,7 +2795,21 @@ app.post('/stop', async (req, res) => {
2242
2795
  cleanupTasks.push(clearSessionDownloads(session));
2243
2796
  }
2244
2797
  await Promise.all(cleanupTasks);
2798
+ for (const session of sessions.values()) {
2799
+ for (const [, group] of session.tabGroups) {
2800
+ for (const tabId of group.keys()) {
2801
+ const lock = tabLocks.get(tabId);
2802
+ if (lock) {
2803
+ lock.drain();
2804
+ tabLocks.delete(tabId);
2805
+ }
2806
+ }
2807
+ }
2808
+ }
2809
+ tabLocks.clear();
2245
2810
  sessions.clear();
2811
+ refreshActiveTabsGauge();
2812
+ refreshTabLockQueueDepth();
2246
2813
  res.json({ ok: true, stopped: true, profile: 'camoufox' });
2247
2814
  } catch (err) {
2248
2815
  res.status(500).json({ ok: false, error: safeError(err) });
@@ -2273,7 +2840,7 @@ app.post('/navigate', async (req, res) => {
2273
2840
  tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
2274
2841
 
2275
2842
  const result = await withTabLock(targetId, async () => {
2276
- await tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
2843
+ await withPageLoadDuration('navigate', () => tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
2277
2844
  tabState.visitedUrls.add(url);
2278
2845
  tabState.lastSnapshot = null;
2279
2846
 
@@ -2510,8 +3077,9 @@ app.post('/act', async (req, res) => {
2510
3077
  if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
2511
3078
  await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
2512
3079
  } else {
2513
- const delta = direction === 'up' ? -amount : amount;
2514
- await tabState.page.mouse.wheel(0, delta);
3080
+ const isVertical = direction === 'up' || direction === 'down';
3081
+ const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
3082
+ await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
2515
3083
  }
2516
3084
  await tabState.page.waitForTimeout(300);
2517
3085
  return { ok: true, targetId };
@@ -2611,6 +3179,7 @@ setInterval(async () => {
2611
3179
  await testContext.close();
2612
3180
  healthState.lastSuccessfulNav = Date.now();
2613
3181
  } catch (err) {
3182
+ failuresTotal.labels('health_probe', 'internal').inc();
2614
3183
  log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
2615
3184
  if (testContext) await testContext.close().catch(() => {});
2616
3185
  restartBrowser('health probe failed').catch(() => {});
@@ -2641,6 +3210,7 @@ async function gracefulShutdown(signal) {
2641
3210
  forceTimeout.unref();
2642
3211
 
2643
3212
  server.close();
3213
+ stopMemoryReporter();
2644
3214
 
2645
3215
  for (const [userId, session] of sessions) {
2646
3216
  await session.context.close().catch(() => {});
@@ -2652,17 +3222,32 @@ async function gracefulShutdown(signal) {
2652
3222
  process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
2653
3223
  process.on('SIGINT', () => gracefulShutdown('SIGINT'));
2654
3224
 
3225
+ // Idle self-shutdown REMOVED — it was racing with min_machines_running=2
3226
+ // and stopping machines that Fly couldn't auto-restart fast enough, leaving
3227
+ // only 1 machine to handle all browser traffic (causing timeouts for users).
3228
+ // Fly's auto_stop_machines=false + min_machines_running=2 handles scaling.
3229
+
2655
3230
  const PORT = CONFIG.port;
2656
3231
  const server = app.listen(PORT, async () => {
2657
- log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3232
+ startMemoryReporter();
3233
+ refreshActiveTabsGauge();
3234
+ refreshTabLockQueueDepth();
3235
+ if (FLY_MACHINE_ID) {
3236
+ log('info', 'server started (fly)', { port: PORT, pid: process.pid, machineId: FLY_MACHINE_ID, nodeVersion: process.version });
3237
+ } else {
3238
+ log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
3239
+ }
2658
3240
  // Pre-warm browser so first request doesn't eat a 6-7s cold start
2659
3241
  try {
2660
3242
  const start = Date.now();
2661
3243
  await ensureBrowser();
2662
3244
  log('info', 'browser pre-warmed', { ms: Date.now() - start });
3245
+ scheduleBrowserIdleShutdown();
2663
3246
  } catch (err) {
2664
- log('error', 'browser pre-warm failed (will retry on first request)', { error: err.message });
3247
+ log('error', 'browser pre-warm failed (will retry in background)', { error: err.message });
3248
+ scheduleBrowserWarmRetry();
2665
3249
  }
3250
+ // Idle self-shutdown removed — Fly manages machine lifecycle via fly.toml.
2666
3251
  });
2667
3252
 
2668
3253
  server.on('error', (err) => {