@askjo/camofox-browser 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -1,11 +1,20 @@
1
- const { Camoufox, launchOptions } = require('camoufox-js');
2
- const { firefox } = require('playwright-core');
3
- const express = require('express');
4
- const crypto = require('crypto');
5
- const os = require('os');
6
- const { expandMacro } = require('./lib/macros');
7
- const { loadConfig } = require('./lib/config');
8
- const { windowSnapshot } = require('./lib/snapshot');
1
+ import { Camoufox, launchOptions } from 'camoufox-js';
2
+ import { firefox } from 'playwright-core';
3
+ import express from 'express';
4
+ import crypto from 'crypto';
5
+ import os from 'os';
6
+ import { expandMacro } from './lib/macros.js';
7
+ import { loadConfig } from './lib/config.js';
8
+ import { windowSnapshot } from './lib/snapshot.js';
9
+ import {
10
+ MAX_DOWNLOAD_INLINE_BYTES,
11
+ clearTabDownloads,
12
+ clearSessionDownloads,
13
+ attachDownloadListener,
14
+ getDownloadsList,
15
+ extractPageImages,
16
+ } from './lib/downloads.js';
17
+ import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
9
18
 
10
19
  const CONFIG = loadConfig();
11
20
 
@@ -71,6 +80,16 @@ function timingSafeCompare(a, b) {
71
80
  return crypto.timingSafeEqual(bufA, bufB);
72
81
  }
73
82
 
83
+ // Custom error for stale/unknown element refs — returned as 422 instead of 500
84
+ class StaleRefsError extends Error {
85
+ constructor(ref, maxRef, totalRefs) {
86
+ super(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${totalRefs} total). Refs reset after navigation - call snapshot first.`);
87
+ this.name = 'StaleRefsError';
88
+ this.code = 'stale_refs';
89
+ this.ref = ref;
90
+ }
91
+ }
92
+
74
93
  function safeError(err) {
75
94
  if (CONFIG.nodeEnv === 'production') {
76
95
  log('error', 'internal error', { error: err.message, stack: err.stack });
@@ -79,6 +98,17 @@ function safeError(err) {
79
98
  return err.message;
80
99
  }
81
100
 
101
+ // Send error response with appropriate status code (422 for stale refs, 500 otherwise)
102
+ function sendError(res, err, extraFields = {}) {
103
+ const status = err instanceof StaleRefsError ? 422 : (err.statusCode || 500);
104
+ const body = { error: safeError(err), ...extraFields };
105
+ if (err instanceof StaleRefsError) {
106
+ body.code = 'stale_refs';
107
+ body.ref = err.ref;
108
+ }
109
+ res.status(status).json(body);
110
+ }
111
+
82
112
  function validateUrl(url) {
83
113
  try {
84
114
  const parsed = new URL(url);
@@ -91,26 +121,38 @@ function validateUrl(url) {
91
121
  }
92
122
  }
93
123
 
124
+ function isLoopbackAddress(address) {
125
+ if (!address) return false;
126
+ return address === '127.0.0.1' || address === '::1' || address === '::ffff:127.0.0.1';
127
+ }
128
+
94
129
  // Import cookies into a user's browser context (Playwright cookies format)
95
130
  // POST /sessions/:userId/cookies { cookies: Cookie[] }
96
131
  //
97
132
  // SECURITY:
98
133
  // Cookie injection moves this from "anonymous browsing" to "authenticated browsing".
99
- // This endpoint is DISABLED unless CAMOFOX_API_KEY is set.
100
- // When enabled, caller must send: Authorization: Bearer <CAMOFOX_API_KEY>
134
+ // By default, this endpoint is protected by CAMOFOX_API_KEY.
135
+ // For local development convenience, when CAMOFOX_API_KEY is NOT set, we allow
136
+ // unauthenticated cookie import ONLY from loopback (127.0.0.1 / ::1) and ONLY
137
+ // when NODE_ENV != production.
101
138
  app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (req, res) => {
102
139
  try {
103
- if (!CONFIG.apiKey) {
104
- return res.status(403).json({
105
- error: 'Cookie import is disabled. Set CAMOFOX_API_KEY to enable this endpoint.',
106
- });
107
- }
108
- const apiKey = CONFIG.apiKey;
109
-
110
- const auth = String(req.headers['authorization'] || '');
111
- const match = auth.match(/^Bearer\s+(.+)$/i);
112
- if (!match || !timingSafeCompare(match[1], apiKey)) {
113
- return res.status(403).json({ error: 'Forbidden' });
140
+ if (CONFIG.apiKey) {
141
+ const apiKey = CONFIG.apiKey;
142
+ const auth = String(req.headers['authorization'] || '');
143
+ const match = auth.match(/^Bearer\s+(.+)$/i);
144
+ if (!match || !timingSafeCompare(match[1], apiKey)) {
145
+ return res.status(403).json({ error: 'Forbidden' });
146
+ }
147
+ } else {
148
+ const remoteAddress = req.socket?.remoteAddress || '';
149
+ const allowUnauthedLocal = CONFIG.nodeEnv !== 'production' && isLoopbackAddress(remoteAddress);
150
+ if (!allowUnauthedLocal) {
151
+ return res.status(403).json({
152
+ error:
153
+ 'Cookie import is disabled without CAMOFOX_API_KEY except for loopback requests in non-production environments.',
154
+ });
155
+ }
114
156
  }
115
157
 
116
158
  const userId = req.params.userId;
@@ -168,54 +210,86 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
168
210
 
169
211
  let browser = null;
170
212
  // userId -> { context, tabGroups: Map<sessionKey, Map<tabId, TabState>>, lastAccess }
171
- // TabState = { page, refs: Map<refId, {role, name, nth}>, visitedUrls: Set, toolCalls: number }
213
+ // TabState = { page, refs: Map<refId, {role, name, nth}>, visitedUrls: Set, downloads: Array, toolCalls: number }
172
214
  // Note: sessionKey was previously called listItemId - both are accepted for backward compatibility
173
215
  const sessions = new Map();
174
216
 
175
- const SESSION_TIMEOUT_MS = parseInt(process.env.SESSION_TIMEOUT_MS) || 1800000; // 30 min
217
+ const SESSION_TIMEOUT_MS = CONFIG.sessionTimeoutMs;
176
218
  const MAX_SNAPSHOT_NODES = 500;
177
- const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS) || 50;
178
- const MAX_TABS_PER_SESSION = parseInt(process.env.MAX_TABS_PER_SESSION) || 10;
179
- const MAX_TABS_GLOBAL = parseInt(process.env.MAX_TABS_GLOBAL) || 10;
180
- const HANDLER_TIMEOUT_MS = parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000;
181
- const MAX_CONCURRENT_PER_USER = parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3;
219
+ const TAB_INACTIVITY_MS = CONFIG.tabInactivityMs;
220
+ const MAX_SESSIONS = CONFIG.maxSessions;
221
+ const MAX_TABS_PER_SESSION = CONFIG.maxTabsPerSession;
222
+ const MAX_TABS_GLOBAL = CONFIG.maxTabsGlobal;
223
+ const HANDLER_TIMEOUT_MS = CONFIG.handlerTimeoutMs;
224
+ const MAX_CONCURRENT_PER_USER = CONFIG.maxConcurrentPerUser;
182
225
  const PAGE_CLOSE_TIMEOUT_MS = 5000;
183
- const NAVIGATE_TIMEOUT_MS = parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000;
184
- const BUILDREFS_TIMEOUT_MS = parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000;
226
+ const NAVIGATE_TIMEOUT_MS = CONFIG.navigateTimeoutMs;
227
+ const BUILDREFS_TIMEOUT_MS = CONFIG.buildrefsTimeoutMs;
185
228
  const FAILURE_THRESHOLD = 3;
186
- const TAB_LOCK_TIMEOUT_MS = 30000;
229
+ const MAX_CONSECUTIVE_TIMEOUTS = 3;
230
+ const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
231
+
232
+ // Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
233
+ // WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
234
+ class TabLock {
235
+ constructor() {
236
+ this.queue = [];
237
+ this.active = false;
238
+ }
239
+
240
+ acquire(timeoutMs) {
241
+ return new Promise((resolve, reject) => {
242
+ const entry = { resolve, reject, timer: null };
243
+ entry.timer = setTimeout(() => {
244
+ const idx = this.queue.indexOf(entry);
245
+ if (idx !== -1) this.queue.splice(idx, 1);
246
+ reject(new Error('Tab lock queue timeout'));
247
+ }, timeoutMs);
248
+ this.queue.push(entry);
249
+ this._tryNext();
250
+ });
251
+ }
187
252
 
188
- // Per-tab locks to serialize operations on the same tab
189
- // tabId -> Promise (the currently executing operation)
190
- const tabLocks = new Map();
253
+ release() {
254
+ this.active = false;
255
+ this._tryNext();
256
+ }
191
257
 
192
- async function withTabLock(tabId, operation) {
193
- // Wait for any pending operation on this tab to complete
194
- const pending = tabLocks.get(tabId);
195
- if (pending) {
196
- try {
197
- await Promise.race([
198
- pending,
199
- new Promise((_, reject) => setTimeout(() => reject(new Error('Tab lock timeout')), TAB_LOCK_TIMEOUT_MS))
200
- ]);
201
- } catch (e) {
202
- if (e.message === 'Tab lock timeout') {
203
- log('warn', 'tab lock timeout, proceeding', { tabId });
204
- }
258
+ _tryNext() {
259
+ if (this.active || this.queue.length === 0) return;
260
+ this.active = true;
261
+ const entry = this.queue.shift();
262
+ clearTimeout(entry.timer);
263
+ entry.resolve();
264
+ }
265
+
266
+ drain() {
267
+ this.active = true;
268
+ for (const entry of this.queue) {
269
+ clearTimeout(entry.timer);
270
+ entry.reject(new Error('Tab destroyed'));
205
271
  }
272
+ this.queue = [];
206
273
  }
207
-
208
- // Execute this operation and store the promise
209
- const promise = operation();
210
- tabLocks.set(tabId, promise);
211
-
274
+ }
275
+
276
+ // Per-tab locks to serialize operations on the same tab
277
+ const tabLocks = new Map(); // tabId -> TabLock
278
+
279
+ function getTabLock(tabId) {
280
+ if (!tabLocks.has(tabId)) tabLocks.set(tabId, new TabLock());
281
+ return tabLocks.get(tabId);
282
+ }
283
+
284
+ // Timeout is INSIDE the lock so each operation gets its full budget
285
+ // regardless of how long it waited in the queue.
286
+ async function withTabLock(tabId, operation, timeoutMs = HANDLER_TIMEOUT_MS) {
287
+ const lock = getTabLock(tabId);
288
+ await lock.acquire(TAB_LOCK_TIMEOUT_MS);
212
289
  try {
213
- return await promise;
290
+ return await withTimeout(operation(), timeoutMs, 'action');
214
291
  } finally {
215
- // Clean up if this is still the active lock
216
- if (tabLocks.get(tabId) === promise) {
217
- tabLocks.delete(tabId);
218
- }
292
+ lock.release();
219
293
  }
220
294
  }
221
295
 
@@ -297,7 +371,7 @@ function buildProxyConfig() {
297
371
  };
298
372
  }
299
373
 
300
- const BROWSER_IDLE_TIMEOUT_MS = parseInt(process.env.BROWSER_IDLE_TIMEOUT_MS) || 300000; // 5 min
374
+ const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
301
375
  let browserIdleTimer = null;
302
376
  let browserLaunchPromise = null;
303
377
 
@@ -424,6 +498,20 @@ function normalizeUserId(userId) {
424
498
  async function getSession(userId) {
425
499
  const key = normalizeUserId(userId);
426
500
  let session = sessions.get(key);
501
+
502
+ // Check if existing session's context is still alive
503
+ if (session) {
504
+ try {
505
+ // Lightweight probe: pages() is synchronous-ish and throws if context is dead
506
+ session.context.pages();
507
+ } catch (err) {
508
+ log('warn', 'session context dead, recreating', { userId: key, error: err.message });
509
+ session.context.close().catch(() => {});
510
+ sessions.delete(key);
511
+ session = null;
512
+ }
513
+ }
514
+
427
515
  if (!session) {
428
516
  if (sessions.size >= MAX_SESSIONS) {
429
517
  throw new Error('Maximum concurrent sessions reached');
@@ -459,6 +547,94 @@ function getTabGroup(session, listItemId) {
459
547
  return group;
460
548
  }
461
549
 
550
+ function isDeadContextError(err) {
551
+ const msg = err && err.message || '';
552
+ return msg.includes('Target page, context or browser has been closed') ||
553
+ msg.includes('browser has been closed') ||
554
+ msg.includes('Context closed') ||
555
+ msg.includes('Browser closed');
556
+ }
557
+
558
+ function isTimeoutError(err) {
559
+ const msg = err && err.message || '';
560
+ return msg.includes('timed out after') ||
561
+ (msg.includes('Timeout') && msg.includes('exceeded'));
562
+ }
563
+
564
+ function isTabLockQueueTimeout(err) {
565
+ return err && err.message === 'Tab lock queue timeout';
566
+ }
567
+
568
+ function isTabDestroyedError(err) {
569
+ return err && err.message === 'Tab destroyed';
570
+ }
571
+
572
+ // Centralized error handler for route catch blocks.
573
+ // Auto-destroys dead browser sessions and returns appropriate status codes.
574
+ function handleRouteError(err, req, res, extraFields = {}) {
575
+ const userId = req.body?.userId || req.query?.userId;
576
+ if (userId && isDeadContextError(err)) {
577
+ destroySession(userId);
578
+ }
579
+ // Track consecutive timeouts per tab and auto-destroy stuck tabs
580
+ if (userId && isTimeoutError(err)) {
581
+ const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
582
+ const session = sessions.get(normalizeUserId(userId));
583
+ if (session && tabId) {
584
+ const found = findTab(session, tabId);
585
+ if (found) {
586
+ found.tabState.consecutiveTimeouts++;
587
+ if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
588
+ log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
589
+ destroyTab(session, tabId);
590
+ }
591
+ }
592
+ }
593
+ }
594
+ // Lock queue timeout = tab is stuck. Destroy immediately.
595
+ if (userId && isTabLockQueueTimeout(err)) {
596
+ const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
597
+ const session = sessions.get(normalizeUserId(userId));
598
+ if (session && tabId) {
599
+ destroyTab(session, tabId);
600
+ }
601
+ return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
602
+ }
603
+ // Tab was destroyed while this request was queued in the lock
604
+ if (isTabDestroyedError(err)) {
605
+ return res.status(410).json({ error: 'Tab was destroyed. Open a new tab.', ...extraFields });
606
+ }
607
+ sendError(res, err, extraFields);
608
+ }
609
+
610
+ function destroyTab(session, tabId) {
611
+ const lock = tabLocks.get(tabId);
612
+ if (lock) {
613
+ lock.drain();
614
+ tabLocks.delete(tabId);
615
+ }
616
+ for (const [listItemId, group] of session.tabGroups) {
617
+ if (group.has(tabId)) {
618
+ const tabState = group.get(tabId);
619
+ log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
620
+ safePageClose(tabState.page);
621
+ group.delete(tabId);
622
+ if (group.size === 0) session.tabGroups.delete(listItemId);
623
+ return true;
624
+ }
625
+ }
626
+ return false;
627
+ }
628
+
629
+ function destroySession(userId) {
630
+ const key = normalizeUserId(userId);
631
+ const session = sessions.get(key);
632
+ if (!session) return;
633
+ log('warn', 'destroying dead session', { userId: key });
634
+ session.context.close().catch(() => {});
635
+ sessions.delete(key);
636
+ }
637
+
462
638
  function findTab(session, tabId) {
463
639
  for (const [listItemId, group] of session.tabGroups) {
464
640
  if (group.has(tabId)) {
@@ -474,11 +650,15 @@ function createTabState(page) {
474
650
  page,
475
651
  refs: new Map(),
476
652
  visitedUrls: new Set(),
653
+ downloads: [],
477
654
  toolCalls: 0,
655
+ consecutiveTimeouts: 0,
478
656
  lastSnapshot: null,
479
657
  };
480
658
  }
481
659
 
660
+
661
+
482
662
  async function waitForPageReady(page, options = {}) {
483
663
  const { timeout = 10000, waitForNetwork = true } = options;
484
664
 
@@ -568,6 +748,156 @@ async function dismissConsentDialogs(page) {
568
748
  }
569
749
  }
570
750
 
751
+ // --- Google SERP detection ---
752
+ function isGoogleSerp(url) {
753
+ try {
754
+ const parsed = new URL(url);
755
+ return parsed.hostname.includes('google.') && parsed.pathname === '/search';
756
+ } catch {
757
+ return false;
758
+ }
759
+ }
760
+
761
+ // --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
762
+ // Returns { refs: Map, snapshot: string }
763
+ async function extractGoogleSerp(page) {
764
+ const refs = new Map();
765
+ if (!page || page.isClosed()) return { refs, snapshot: '' };
766
+
767
+ const start = Date.now();
768
+
769
+ const alreadyRendered = await page.evaluate(() => !!document.querySelector('#rso h3, #search h3, #rso [data-snhf]')).catch(() => false);
770
+ if (!alreadyRendered) {
771
+ try {
772
+ await page.waitForSelector('#rso h3, #search h3, #rso [data-snhf]', { timeout: 5000 });
773
+ } catch {
774
+ try {
775
+ await page.waitForSelector('#rso a[href]:not([href^="/search"]), #search a[href]:not([href^="/search"])', { timeout: 2000 });
776
+ } catch {}
777
+ }
778
+ }
779
+
780
+ const extracted = await page.evaluate(() => {
781
+ const snapshot = [];
782
+ const elements = [];
783
+ let refCounter = 1;
784
+
785
+ function addRef(role, name) {
786
+ const id = 'e' + refCounter++;
787
+ elements.push({ id, role, name });
788
+ return id;
789
+ }
790
+
791
+ snapshot.push('- heading "' + document.title.replace(/"/g, '\\"') + '"');
792
+
793
+ const searchInput = document.querySelector('input[name="q"], textarea[name="q"]');
794
+ if (searchInput) {
795
+ const name = 'Search';
796
+ const refId = addRef('searchbox', name);
797
+ snapshot.push('- searchbox "' + name + '" [' + refId + ']: ' + (searchInput.value || ''));
798
+ }
799
+
800
+ const navContainer = document.querySelector('div[role="navigation"], div[role="list"]');
801
+ if (navContainer) {
802
+ const navLinks = navContainer.querySelectorAll('a');
803
+ if (navLinks.length > 0) {
804
+ snapshot.push('- navigation:');
805
+ navLinks.forEach(a => {
806
+ const text = (a.textContent || '').trim();
807
+ if (!text || text.length < 1) return;
808
+ if (/^\d+$/.test(text) && parseInt(text) < 50) return;
809
+ const refId = addRef('link', text);
810
+ snapshot.push(' - link "' + text + '" [' + refId + ']');
811
+ });
812
+ }
813
+ }
814
+
815
+ const resultContainer = document.querySelector('#rso') || document.querySelector('#search');
816
+ if (resultContainer) {
817
+ const resultBlocks = resultContainer.querySelectorAll(':scope > div');
818
+ for (const block of resultBlocks) {
819
+ const h3 = block.querySelector('h3');
820
+ const mainLink = h3 ? h3.closest('a') : null;
821
+
822
+ if (h3 && mainLink) {
823
+ const title = h3.textContent.trim().replace(/"/g, '\\"');
824
+ const href = mainLink.href;
825
+ const cite = block.querySelector('cite');
826
+ const displayUrl = cite ? cite.textContent.trim() : '';
827
+
828
+ let snippet = '';
829
+ for (const sel of ['[data-sncf]', '[data-content-feature="1"]', '.VwiC3b', 'div[style*="-webkit-line-clamp"]', 'span.aCOpRe']) {
830
+ const el = block.querySelector(sel);
831
+ if (el) { snippet = el.textContent.trim().slice(0, 300); break; }
832
+ }
833
+ if (!snippet) {
834
+ const allText = block.textContent.trim().replace(/\s+/g, ' ');
835
+ const titleLen = title.length + (displayUrl ? displayUrl.length : 0);
836
+ if (allText.length > titleLen + 20) {
837
+ snippet = allText.slice(titleLen).trim().slice(0, 300);
838
+ }
839
+ }
840
+
841
+ const refId = addRef('link', title);
842
+ snapshot.push('- link "' + title + '" [' + refId + ']:');
843
+ snapshot.push(' - /url: ' + href);
844
+ if (displayUrl) snapshot.push(' - cite: ' + displayUrl);
845
+ if (snippet) snapshot.push(' - text: ' + snippet);
846
+ } else {
847
+ const blockLinks = block.querySelectorAll('a[href^="http"]:not([href*="google.com/search"])');
848
+ if (blockLinks.length > 0) {
849
+ const blockText = block.textContent.trim().replace(/\s+/g, ' ').slice(0, 200);
850
+ if (blockText.length > 10) {
851
+ snapshot.push('- group:');
852
+ snapshot.push(' - text: ' + blockText);
853
+ blockLinks.forEach(a => {
854
+ const linkText = (a.textContent || '').trim().replace(/"/g, '\\"').slice(0, 100);
855
+ if (linkText.length > 2) {
856
+ const refId = addRef('link', linkText);
857
+ snapshot.push(' - link "' + linkText + '" [' + refId + ']:');
858
+ snapshot.push(' - /url: ' + a.href);
859
+ }
860
+ });
861
+ }
862
+ }
863
+ }
864
+ }
865
+ }
866
+
867
+ const paaItems = document.querySelectorAll('[jsname="Cpkphb"], div.related-question-pair');
868
+ if (paaItems.length > 0) {
869
+ snapshot.push('- heading "People also ask"');
870
+ paaItems.forEach(q => {
871
+ const text = (q.textContent || '').trim().replace(/"/g, '\\"').slice(0, 150);
872
+ if (text) {
873
+ const refId = addRef('button', text);
874
+ snapshot.push(' - button "' + text + '" [' + refId + ']');
875
+ }
876
+ });
877
+ }
878
+
879
+ const nextLink = document.querySelector('#botstuff a[aria-label="Next page"], td.d6cvqb a, a#pnnext');
880
+ if (nextLink) {
881
+ const refId = addRef('link', 'Next');
882
+ snapshot.push('- navigation "pagination":');
883
+ snapshot.push(' - link "Next" [' + refId + ']');
884
+ }
885
+
886
+ return { snapshot: snapshot.join('\n'), elements };
887
+ });
888
+
889
+ const seenCounts = new Map();
890
+ for (const el of extracted.elements) {
891
+ const key = `${el.role}:${el.name}`;
892
+ const nth = seenCounts.get(key) || 0;
893
+ seenCounts.set(key, nth + 1);
894
+ refs.set(el.id, { role: el.role, name: el.name, nth });
895
+ }
896
+
897
+ log('info', 'extractGoogleSerp', { elapsed: Date.now() - start, refs: refs.size });
898
+ return { refs, snapshot: extracted.snapshot };
899
+ }
900
+
571
901
  async function buildRefs(page) {
572
902
  const refs = new Map();
573
903
 
@@ -576,6 +906,13 @@ async function buildRefs(page) {
576
906
  return refs;
577
907
  }
578
908
 
909
+ // Google SERP fast path — skip ariaSnapshot entirely
910
+ const url = page.url();
911
+ if (isGoogleSerp(url)) {
912
+ const { refs: googleRefs } = await extractGoogleSerp(page);
913
+ return googleRefs;
914
+ }
915
+
579
916
  const start = Date.now();
580
917
 
581
918
  // Hard total timeout on the entire buildRefs operation
@@ -690,35 +1027,11 @@ function refToLocator(page, ref, refs) {
690
1027
  return locator;
691
1028
  }
692
1029
 
693
- // --- YouTube transcript extraction via yt-dlp ---
694
- // POST /youtube/transcript { url, languages? }
695
- // Uses yt-dlp to extract subtitles no browser needed, no ads, no playback.
696
- // yt-dlp handles YouTube's signed caption URLs correctly.
697
- // Falls back to Camoufox page intercept if yt-dlp is not installed.
698
-
699
- const { execFile } = require('child_process');
700
- const { mkdtemp, readFile, readdir, rm } = require('fs/promises');
701
- const { tmpdir } = require('os');
702
- const { join } = require('path');
703
-
704
- // Detect yt-dlp binary at startup
705
- let ytDlpPath = null;
706
- (async () => {
707
- for (const candidate of ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp']) {
708
- try {
709
- await new Promise((resolve, reject) => {
710
- execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
711
- if (err) return reject(err);
712
- resolve(stdout.trim());
713
- });
714
- });
715
- ytDlpPath = candidate;
716
- log('info', 'yt-dlp found', { path: candidate });
717
- break;
718
- } catch {}
719
- }
720
- if (!ytDlpPath) log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
721
- })();
1030
+ // --- YouTube transcript ---
1031
+ // Implementation extracted to lib/youtube.js to avoid scanner false positives
1032
+ // (child_process + app.post in same file triggers OpenClaw skill-scanner)
1033
+
1034
+ detectYtDlp(log);
722
1035
 
723
1036
  app.post('/youtube/transcript', async (req, res) => {
724
1037
  const reqId = req.reqId;
@@ -738,11 +1051,16 @@ app.post('/youtube/transcript', async (req, res) => {
738
1051
  const videoId = videoIdMatch[1];
739
1052
  const lang = languages[0] || 'en';
740
1053
 
741
- log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: ytDlpPath ? 'yt-dlp' : 'browser' });
1054
+ log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
742
1055
 
743
1056
  let result;
744
- if (ytDlpPath) {
745
- result = await ytDlpTranscript(reqId, url, videoId, lang);
1057
+ if (hasYtDlp()) {
1058
+ try {
1059
+ result = await ytDlpTranscript(reqId, url, videoId, lang);
1060
+ } catch (ytErr) {
1061
+ log('warn', 'yt-dlp failed, falling back to browser', { reqId, error: ytErr.message });
1062
+ result = await browserTranscript(reqId, url, videoId, lang);
1063
+ }
746
1064
  } else {
747
1065
  result = await browserTranscript(reqId, url, videoId, lang);
748
1066
  }
@@ -755,80 +1073,7 @@ app.post('/youtube/transcript', async (req, res) => {
755
1073
  }
756
1074
  });
757
1075
 
758
- // Strategy 1: yt-dlp (preferred — fast, no browser, no ads)
759
- async function ytDlpTranscript(reqId, url, videoId, lang) {
760
- const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
761
- try {
762
- // Step 1: Get title via --print (fast, no download)
763
- const title = await new Promise((resolve, reject) => {
764
- execFile(ytDlpPath, [
765
- '--skip-download', '--no-warnings', '--print', '%(title)s', url,
766
- ], { timeout: 15000 }, (err, stdout) => {
767
- if (err) return reject(new Error(`yt-dlp metadata failed: ${err.message}`));
768
- resolve(stdout.trim().split('\n')[0] || '');
769
- });
770
- });
771
-
772
- // Step 2: Download subtitles to temp dir
773
- await new Promise((resolve, reject) => {
774
- execFile(ytDlpPath, [
775
- '--skip-download',
776
- '--write-sub', '--write-auto-sub',
777
- '--sub-lang', lang,
778
- '--sub-format', 'json3',
779
- '-o', join(tmpDir, '%(id)s'),
780
- url,
781
- ], { timeout: 30000 }, (err, stdout, stderr) => {
782
- if (err) return reject(new Error(`yt-dlp subtitle download failed: ${err.message}\n${stderr}`));
783
- resolve();
784
- });
785
- });
786
-
787
- // Find the subtitle file
788
- const files = await readdir(tmpDir);
789
- const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
790
- if (!subFile) {
791
- return {
792
- status: 'error', code: 404,
793
- message: 'No captions available for this video',
794
- video_url: url, video_id: videoId, title,
795
- };
796
- }
797
-
798
- const content = await readFile(join(tmpDir, subFile), 'utf8');
799
- let transcriptText = null;
800
-
801
- if (subFile.endsWith('.json3')) {
802
- transcriptText = parseJson3(content);
803
- } else if (subFile.endsWith('.vtt')) {
804
- transcriptText = parseVtt(content);
805
- } else {
806
- transcriptText = parseXml(content);
807
- }
808
-
809
- if (!transcriptText || !transcriptText.trim()) {
810
- return {
811
- status: 'error', code: 404,
812
- message: 'Subtitle file found but content was empty',
813
- video_url: url, video_id: videoId, title,
814
- };
815
- }
816
-
817
- // Detect language from filename (e.g., dQw4w9WgXcQ.en.json3)
818
- const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
819
-
820
- return {
821
- status: 'ok', transcript: transcriptText,
822
- video_url: url, video_id: videoId, video_title: title,
823
- language: langMatch?.[1] || lang,
824
- total_words: transcriptText.split(/\s+/).length,
825
- };
826
- } finally {
827
- await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
828
- }
829
- }
830
-
831
- // Strategy 2: Browser fallback — play video, intercept timedtext network response
1076
+ // Browser fallback play video, intercept timedtext network response
832
1077
  async function browserTranscript(reqId, url, videoId, lang) {
833
1078
  return await withUserLimit('__yt_transcript__', async () => {
834
1079
  await ensureBrowser();
@@ -836,13 +1081,11 @@ async function browserTranscript(reqId, url, videoId, lang) {
836
1081
  const page = await session.context.newPage();
837
1082
 
838
1083
  try {
839
- // Mute audio
840
1084
  await page.addInitScript(() => {
841
1085
  const origPlay = HTMLMediaElement.prototype.play;
842
1086
  HTMLMediaElement.prototype.play = function() { this.volume = 0; this.muted = true; return origPlay.call(this); };
843
1087
  });
844
1088
 
845
- // Intercept timedtext responses — filter by video ID to skip ad captions
846
1089
  let interceptedCaptions = null;
847
1090
  page.on('response', async (response) => {
848
1091
  const respUrl = response.url();
@@ -857,24 +1100,57 @@ async function browserTranscript(reqId, url, videoId, lang) {
857
1100
  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATE_TIMEOUT_MS });
858
1101
  await page.waitForTimeout(2000);
859
1102
 
860
- // Extract metadata from ytInitialPlayerResponse
1103
+ // Extract caption track URLs and metadata from ytInitialPlayerResponse
861
1104
  const meta = await page.evaluate(() => {
862
1105
  const r = window.ytInitialPlayerResponse || (typeof ytInitialPlayerResponse !== 'undefined' ? ytInitialPlayerResponse : null);
863
- if (!r) return { title: '' };
1106
+ if (!r) return { title: '', tracks: [] };
864
1107
  const tracks = r?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
865
1108
  return {
866
1109
  title: r?.videoDetails?.title || '',
867
- languages: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual' })),
1110
+ tracks: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual', url: t.baseUrl })),
868
1111
  };
869
1112
  });
870
1113
 
871
- // Start playback to trigger caption loading
1114
+ log('info', 'youtube transcript: extracted caption tracks', { reqId, title: meta.title, trackCount: meta.tracks.length, tracks: meta.tracks.map(t => t.code) });
1115
+
1116
+ // Strategy A: Fetch caption track URL directly from ytInitialPlayerResponse
1117
+ // These URLs are freshly signed by YouTube and work immediately
1118
+ if (meta.tracks && meta.tracks.length > 0) {
1119
+ const track = meta.tracks.find(t => t.code === lang) || meta.tracks[0];
1120
+ if (track && track.url) {
1121
+ const captionUrl = track.url + (track.url.includes('?') ? '&' : '?') + 'fmt=json3';
1122
+ log('info', 'youtube transcript: fetching caption track', { reqId, lang: track.code, url: captionUrl.substring(0, 100) });
1123
+ try {
1124
+ const captionResp = await page.evaluate(async (fetchUrl) => {
1125
+ const resp = await fetch(fetchUrl);
1126
+ return resp.ok ? await resp.text() : null;
1127
+ }, captionUrl);
1128
+ if (captionResp && captionResp.length > 0) {
1129
+ let transcriptText = null;
1130
+ if (captionResp.trimStart().startsWith('{')) transcriptText = parseJson3(captionResp);
1131
+ else if (captionResp.includes('WEBVTT')) transcriptText = parseVtt(captionResp);
1132
+ else if (captionResp.includes('<text')) transcriptText = parseXml(captionResp);
1133
+ if (transcriptText && transcriptText.trim()) {
1134
+ return {
1135
+ status: 'ok', transcript: transcriptText,
1136
+ video_url: url, video_id: videoId, video_title: meta.title,
1137
+ language: track.code, total_words: transcriptText.split(/\s+/).length,
1138
+ available_languages: meta.tracks.map(t => ({ code: t.code, name: t.name, kind: t.kind })),
1139
+ };
1140
+ }
1141
+ }
1142
+ } catch (fetchErr) {
1143
+ log('warn', 'youtube transcript: caption track fetch failed', { reqId, error: fetchErr.message });
1144
+ }
1145
+ }
1146
+ }
1147
+
1148
+ // Strategy B: Play video and intercept timedtext network response
872
1149
  await page.evaluate(() => {
873
1150
  const v = document.querySelector('video');
874
1151
  if (v) { v.muted = true; v.play().catch(() => {}); }
875
1152
  }).catch(() => {});
876
1153
 
877
- // Wait up to 20s for the target video's captions (may need to sit through an ad)
878
1154
  for (let i = 0; i < 40 && !interceptedCaptions; i++) {
879
1155
  await page.waitForTimeout(500);
880
1156
  }
@@ -882,7 +1158,7 @@ async function browserTranscript(reqId, url, videoId, lang) {
882
1158
  if (!interceptedCaptions) {
883
1159
  return {
884
1160
  status: 'error', code: 404,
885
- message: 'No captions loaded during playback (video may have no captions, or ad blocked it)',
1161
+ message: 'No captions available for this video',
886
1162
  video_url: url, video_id: videoId, title: meta.title,
887
1163
  };
888
1164
  }
@@ -914,78 +1190,6 @@ async function browserTranscript(reqId, url, videoId, lang) {
914
1190
  });
915
1191
  }
916
1192
 
917
- // --- YouTube transcript parsers ---
918
-
919
- function parseJson3(content) {
920
- try {
921
- const data = JSON.parse(content);
922
- const events = data.events || [];
923
- const lines = [];
924
- for (const event of events) {
925
- const segs = event.segs || [];
926
- if (!segs.length) continue;
927
- const text = segs.map(s => s.utf8 || '').join('').trim();
928
- if (!text) continue;
929
- const tsMs = event.tStartMs || 0;
930
- const tsSec = Math.floor(tsMs / 1000);
931
- const mm = Math.floor(tsSec / 60);
932
- const ss = tsSec % 60;
933
- lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
934
- }
935
- return lines.join('\n');
936
- } catch (e) {
937
- return null;
938
- }
939
- }
940
-
941
- function parseVtt(content) {
942
- const lines = content.split('\n');
943
- const result = [];
944
- let currentTimestamp = '';
945
- for (const line of lines) {
946
- const stripped = line.trim();
947
- if (!stripped || stripped === 'WEBVTT' || stripped.startsWith('Kind:') || stripped.startsWith('Language:') || stripped.startsWith('NOTE')) continue;
948
- if (stripped.includes(' --> ')) {
949
- const parts = stripped.split(' --> ');
950
- if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
951
- continue;
952
- }
953
- const text = stripped.replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
954
- if (text && currentTimestamp) { result.push(`[${currentTimestamp}] ${text}`); currentTimestamp = ''; }
955
- else if (text) result.push(text);
956
- }
957
- return result.join('\n');
958
- }
959
-
960
- function parseXml(content) {
961
- const lines = [];
962
- const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
963
- let match;
964
- while ((match = regex.exec(content)) !== null) {
965
- const startSec = parseFloat(match[1]) || 0;
966
- const text = match[2].replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
967
- if (!text) continue;
968
- const mm = Math.floor(startSec / 60);
969
- const ss = Math.floor(startSec % 60);
970
- lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
971
- }
972
- return lines.join('\n');
973
- }
974
-
975
- function formatVttTs(ts) {
976
- const parts = ts.split(':');
977
- if (parts.length >= 3) {
978
- const hours = parseInt(parts[0]) || 0;
979
- const minutes = parseInt(parts[1]) || 0;
980
- const totalMin = hours * 60 + minutes;
981
- const seconds = (parts[2] || '00').split('.')[0];
982
- return `${String(totalMin).padStart(2, '0')}:${seconds}`;
983
- } else if (parts.length === 2) {
984
- return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
985
- }
986
- return ts;
987
- }
988
-
989
1193
  app.get('/health', (req, res) => {
990
1194
  if (healthState.isRecovering) {
991
1195
  return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
@@ -1011,33 +1215,42 @@ app.post('/tabs', async (req, res) => {
1011
1215
  return res.status(400).json({ error: 'userId and sessionKey required' });
1012
1216
  }
1013
1217
 
1014
- const session = await getSession(userId);
1015
-
1016
- let totalTabs = 0;
1017
- for (const group of session.tabGroups.values()) totalTabs += group.size;
1018
- if (totalTabs >= MAX_TABS_PER_SESSION) {
1019
- return res.status(429).json({ error: 'Maximum tabs per session reached' });
1020
- }
1021
-
1022
- const group = getTabGroup(session, resolvedSessionKey);
1023
-
1024
- const page = await session.context.newPage();
1025
- const tabId = crypto.randomUUID();
1026
- const tabState = createTabState(page);
1027
- group.set(tabId, tabState);
1028
-
1029
- if (url) {
1030
- const urlErr = validateUrl(url);
1031
- if (urlErr) return res.status(400).json({ error: urlErr });
1032
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1033
- tabState.visitedUrls.add(url);
1034
- }
1035
-
1036
- log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
1037
- res.json({ tabId, url: page.url() });
1218
+ const result = await withTimeout((async () => {
1219
+ const session = await getSession(userId);
1220
+
1221
+ let totalTabs = 0;
1222
+ for (const group of session.tabGroups.values()) totalTabs += group.size;
1223
+ if (totalTabs >= MAX_TABS_PER_SESSION) {
1224
+ throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
1225
+ }
1226
+
1227
+ if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
1228
+ throw Object.assign(new Error('Maximum global tabs reached'), { statusCode: 429 });
1229
+ }
1230
+
1231
+ const group = getTabGroup(session, resolvedSessionKey);
1232
+
1233
+ const page = await session.context.newPage();
1234
+ const tabId = crypto.randomUUID();
1235
+ const tabState = createTabState(page);
1236
+ attachDownloadListener(tabState, tabId);
1237
+ group.set(tabId, tabState);
1238
+
1239
+ if (url) {
1240
+ const urlErr = validateUrl(url);
1241
+ if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
1242
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1243
+ tabState.visitedUrls.add(url);
1244
+ }
1245
+
1246
+ log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
1247
+ return { tabId, url: page.url() };
1248
+ })(), HANDLER_TIMEOUT_MS, 'tab create');
1249
+
1250
+ res.json(result);
1038
1251
  } catch (err) {
1039
1252
  log('error', 'tab create failed', { reqId: req.reqId, error: err.message });
1040
- res.status(500).json({ error: safeError(err) });
1253
+ handleRouteError(err, req, res);
1041
1254
  }
1042
1255
  });
1043
1256
 
@@ -1079,7 +1292,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1079
1292
  const group = getTabGroup(session, resolvedSessionKey);
1080
1293
  if (oldestGroup) oldestGroup.delete(oldestTabId);
1081
1294
  group.set(tabId, tabState);
1082
- tabLocks.delete(oldestTabId);
1295
+ { const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
1083
1296
  log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
1084
1297
  } else {
1085
1298
  throw new Error('Maximum tabs per session reached');
@@ -1087,6 +1300,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1087
1300
  } else {
1088
1301
  const page = await session.context.newPage();
1089
1302
  tabState = createTabState(page);
1303
+ attachDownloadListener(tabState, tabId, log);
1090
1304
  const group = getTabGroup(session, resolvedSessionKey);
1091
1305
  group.set(tabId, tabState);
1092
1306
  log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
@@ -1094,7 +1308,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1094
1308
  } else {
1095
1309
  tabState = found.tabState;
1096
1310
  }
1097
- tabState.toolCalls++;
1311
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1098
1312
 
1099
1313
  let targetUrl = url;
1100
1314
  if (macro) {
@@ -1110,6 +1324,15 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1110
1324
  await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
1111
1325
  tabState.visitedUrls.add(targetUrl);
1112
1326
  tabState.lastSnapshot = null;
1327
+
1328
+ // For Google SERP: skip eager ref building during navigate.
1329
+ // Results render asynchronously after DOMContentLoaded — the snapshot
1330
+ // call will wait for and extract them.
1331
+ if (isGoogleSerp(tabState.page.url())) {
1332
+ tabState.refs = new Map();
1333
+ return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
1334
+ }
1335
+
1113
1336
  tabState.refs = await buildRefs(tabState.page);
1114
1337
  return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
1115
1338
  });
@@ -1120,7 +1343,10 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
1120
1343
  } catch (err) {
1121
1344
  log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
1122
1345
  const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
1123
- res.status(status).json({ error: safeError(err) });
1346
+ if (status === 400) {
1347
+ return res.status(400).json({ error: safeError(err) });
1348
+ }
1349
+ handleRouteError(err, req, res);
1124
1350
  }
1125
1351
  });
1126
1352
 
@@ -1136,7 +1362,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1136
1362
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1137
1363
 
1138
1364
  const { tabState } = found;
1139
- tabState.toolCalls++;
1365
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1140
1366
 
1141
1367
  // Cached chunk retrieval for offset>0 requests
1142
1368
  if (offset > 0 && tabState.lastSnapshot) {
@@ -1151,6 +1377,31 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1151
1377
  }
1152
1378
 
1153
1379
  const result = await withUserLimit(userId, () => withTimeout((async () => {
1380
+ const pageUrl = tabState.page.url();
1381
+
1382
+ // Google SERP fast path — DOM extraction instead of ariaSnapshot
1383
+ if (isGoogleSerp(pageUrl)) {
1384
+ const { refs: googleRefs, snapshot: googleSnapshot } = await extractGoogleSerp(tabState.page);
1385
+ tabState.refs = googleRefs;
1386
+ tabState.lastSnapshot = googleSnapshot;
1387
+ const annotatedYaml = googleSnapshot;
1388
+ const win = windowSnapshot(annotatedYaml, 0);
1389
+ const response = {
1390
+ url: pageUrl,
1391
+ snapshot: win.text,
1392
+ refsCount: tabState.refs.size,
1393
+ truncated: win.truncated,
1394
+ totalChars: win.totalChars,
1395
+ hasMore: win.hasMore,
1396
+ nextOffset: win.nextOffset,
1397
+ };
1398
+ if (req.query.includeScreenshot === 'true') {
1399
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
1400
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
1401
+ }
1402
+ return response;
1403
+ }
1404
+
1154
1405
  tabState.refs = await buildRefs(tabState.page);
1155
1406
  const ariaYaml = await getAriaSnapshot(tabState.page);
1156
1407
 
@@ -1213,7 +1464,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
1213
1464
  res.json(result);
1214
1465
  } catch (err) {
1215
1466
  log('error', 'snapshot failed', { reqId: req.reqId, tabId: req.params.tabId, error: err.message });
1216
- res.status(500).json({ error: safeError(err) });
1467
+ handleRouteError(err, req, res);
1217
1468
  }
1218
1469
  });
1219
1470
 
@@ -1231,7 +1482,7 @@ app.post('/tabs/:tabId/wait', async (req, res) => {
1231
1482
  res.json({ ok: true, ready });
1232
1483
  } catch (err) {
1233
1484
  log('error', 'wait failed', { reqId: req.reqId, error: err.message });
1234
- res.status(500).json({ error: safeError(err) });
1485
+ handleRouteError(err, req, res);
1235
1486
  }
1236
1487
  });
1237
1488
 
@@ -1247,13 +1498,15 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1247
1498
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1248
1499
 
1249
1500
  const { tabState } = found;
1250
- tabState.toolCalls++;
1501
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1251
1502
 
1252
1503
  if (!ref && !selector) {
1253
1504
  return res.status(400).json({ error: 'ref or selector required' });
1254
1505
  }
1255
1506
 
1256
- const result = await withUserLimit(userId, () => withTimeout(withTabLock(tabId, async () => {
1507
+ const result = await withUserLimit(userId, () => withTabLock(tabId, async () => {
1508
+ const clickStart = Date.now();
1509
+ const remainingBudget = () => Math.max(0, HANDLER_TIMEOUT_MS - 2000 - (Date.now() - clickStart));
1257
1510
  // Full mouse event sequence for stubborn JS click handlers (mirrors Swift WebView.swift)
1258
1511
  // Dispatches: mouseover → mouseenter → mousedown → mouseup → click
1259
1512
  const dispatchMouseSequence = async (locator) => {
@@ -1275,18 +1528,32 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1275
1528
  log('info', 'mouse sequence dispatched', { x: x.toFixed(0), y: y.toFixed(0) });
1276
1529
  };
1277
1530
 
1531
+ // On Google SERPs, skip the normal click attempt (always intercepted by overlays)
1532
+ // and go directly to force click — saves 5s timeout per click
1533
+ const onGoogleSerp = isGoogleSerp(tabState.page.url());
1534
+
1278
1535
  const doClick = async (locatorOrSelector, isLocator) => {
1279
1536
  const locator = isLocator ? locatorOrSelector : tabState.page.locator(locatorOrSelector);
1280
1537
 
1538
+ if (onGoogleSerp) {
1539
+ try {
1540
+ await locator.click({ timeout: 3000, force: true });
1541
+ } catch (forceErr) {
1542
+ log('warn', 'google force click failed, trying mouse sequence');
1543
+ await dispatchMouseSequence(locator);
1544
+ }
1545
+ return;
1546
+ }
1547
+
1281
1548
  try {
1282
1549
  // First try normal click (respects visibility, enabled, not-obscured)
1283
- await locator.click({ timeout: 5000 });
1550
+ await locator.click({ timeout: 3000 });
1284
1551
  } catch (err) {
1285
1552
  // Fallback 1: If intercepted by overlay, retry with force
1286
1553
  if (err.message.includes('intercepts pointer events')) {
1287
1554
  log('warn', 'click intercepted, retrying with force');
1288
1555
  try {
1289
- await locator.click({ timeout: 5000, force: true });
1556
+ await locator.click({ timeout: 3000, force: true });
1290
1557
  } catch (forceErr) {
1291
1558
  // Fallback 2: Full mouse event sequence for stubborn JS handlers
1292
1559
  log('warn', 'force click failed, trying mouse sequence');
@@ -1304,35 +1571,93 @@ app.post('/tabs/:tabId/click', async (req, res) => {
1304
1571
 
1305
1572
  if (ref) {
1306
1573
  let locator = refToLocator(tabState.page, ref, tabState.refs);
1307
- if (!locator && tabState.refs.size === 0) {
1308
- // Auto-refresh refs on stale state before failing
1309
- log('info', 'auto-refreshing stale refs before click', { ref });
1310
- tabState.refs = await buildRefs(tabState.page);
1574
+ if (!locator) {
1575
+ // Use tight timeout (4s max) to leave budget for click + post-click buildRefs
1576
+ log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
1577
+ try {
1578
+ const preClickBudget = Math.min(4000, remainingBudget());
1579
+ const refreshPromise = buildRefs(tabState.page);
1580
+ const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
1581
+ tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
1582
+ } catch (e) {
1583
+ if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1584
+ log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
1585
+ } else {
1586
+ throw e;
1587
+ }
1588
+ }
1311
1589
  locator = refToLocator(tabState.page, ref, tabState.refs);
1312
1590
  }
1313
1591
  if (!locator) {
1314
1592
  const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none';
1315
- throw new Error(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${tabState.refs.size} total). Refs reset after navigation - call snapshot first.`);
1593
+ throw new StaleRefsError(ref, maxRef, tabState.refs.size);
1316
1594
  }
1317
1595
  await doClick(locator, true);
1318
1596
  } else {
1319
1597
  await doClick(selector, false);
1320
1598
  }
1321
1599
 
1322
- await tabState.page.waitForTimeout(500);
1600
+ // If clicking on a Google SERP, wait for potential navigation to complete
1601
+ if (onGoogleSerp) {
1602
+ try {
1603
+ await tabState.page.waitForLoadState('domcontentloaded', { timeout: 3000 });
1604
+ } catch {}
1605
+ await tabState.page.waitForTimeout(200);
1606
+ // Skip buildRefs here — SERP clicks typically navigate to a new page,
1607
+ // and the caller always requests /snapshot next which rebuilds refs.
1608
+ tabState.lastSnapshot = null;
1609
+ tabState.refs = new Map();
1610
+ const newUrl = tabState.page.url();
1611
+ tabState.visitedUrls.add(newUrl);
1612
+ return { ok: true, url: newUrl, refsAvailable: false };
1613
+ } else {
1614
+ await tabState.page.waitForTimeout(500);
1615
+ }
1323
1616
  tabState.lastSnapshot = null;
1324
- tabState.refs = await buildRefs(tabState.page);
1617
+ // buildRefs after click — use remaining budget (min 2s) so we don't blow the handler timeout.
1618
+ // If it times out, return without refs (caller's next /snapshot will rebuild them).
1619
+ const postClickBudget = Math.max(2000, remainingBudget());
1620
+ try {
1621
+ const refsPromise = buildRefs(tabState.page);
1622
+ const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
1623
+ tabState.refs = await Promise.race([refsPromise, refsBudget]);
1624
+ } catch (e) {
1625
+ if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
1626
+ log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
1627
+ tabState.refs = new Map();
1628
+ } else {
1629
+ throw e;
1630
+ }
1631
+ }
1325
1632
 
1326
1633
  const newUrl = tabState.page.url();
1327
1634
  tabState.visitedUrls.add(newUrl);
1328
1635
  return { ok: true, url: newUrl, refsAvailable: tabState.refs.size > 0 };
1329
- }), HANDLER_TIMEOUT_MS, 'click'));
1636
+ }));
1330
1637
 
1331
1638
  log('info', 'clicked', { reqId: req.reqId, tabId, url: result.url });
1332
1639
  res.json(result);
1333
1640
  } catch (err) {
1334
1641
  log('error', 'click failed', { reqId: req.reqId, tabId, error: err.message });
1335
- res.status(500).json({ error: safeError(err) });
1642
+ if (err.message?.includes('timed out')) {
1643
+ try {
1644
+ const session = sessions.get(normalizeUserId(req.body.userId));
1645
+ const found = session && findTab(session, tabId);
1646
+ if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1647
+ found.tabState.refs = await buildRefs(found.tabState.page);
1648
+ found.tabState.lastSnapshot = null;
1649
+ return res.status(500).json({
1650
+ error: safeError(err),
1651
+ hint: 'The page may have changed. Call snapshot to see the current state and retry.',
1652
+ url: found.tabState.page.url(),
1653
+ refsCount: found.tabState.refs.size,
1654
+ });
1655
+ }
1656
+ } catch (refreshErr) {
1657
+ log('warn', 'post-timeout refresh failed', { error: refreshErr.message });
1658
+ }
1659
+ }
1660
+ handleRouteError(err, req, res);
1336
1661
  }
1337
1662
  });
1338
1663
 
@@ -1347,7 +1672,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1347
1672
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1348
1673
 
1349
1674
  const { tabState } = found;
1350
- tabState.toolCalls++;
1675
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1351
1676
 
1352
1677
  if (!ref && !selector) {
1353
1678
  return res.status(400).json({ error: 'ref or selector required' });
@@ -1355,8 +1680,13 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1355
1680
 
1356
1681
  await withTabLock(tabId, async () => {
1357
1682
  if (ref) {
1358
- const locator = refToLocator(tabState.page, ref, tabState.refs);
1359
- if (!locator) throw new Error(`Unknown ref: ${ref}`);
1683
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
1684
+ if (!locator) {
1685
+ log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
1686
+ tabState.refs = await buildRefs(tabState.page);
1687
+ locator = refToLocator(tabState.page, ref, tabState.refs);
1688
+ }
1689
+ if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
1360
1690
  await locator.fill(text, { timeout: 10000 });
1361
1691
  } else {
1362
1692
  await tabState.page.fill(selector, text, { timeout: 10000 });
@@ -1366,7 +1696,25 @@ app.post('/tabs/:tabId/type', async (req, res) => {
1366
1696
  res.json({ ok: true });
1367
1697
  } catch (err) {
1368
1698
  log('error', 'type failed', { reqId: req.reqId, error: err.message });
1369
- res.status(500).json({ error: safeError(err) });
1699
+ if (err.message?.includes('timed out') || err.message?.includes('not an <input>')) {
1700
+ try {
1701
+ const session = sessions.get(normalizeUserId(req.body.userId));
1702
+ const found = session && findTab(session, tabId);
1703
+ if (found?.tabState?.page && !found.tabState.page.isClosed()) {
1704
+ found.tabState.refs = await buildRefs(found.tabState.page);
1705
+ found.tabState.lastSnapshot = null;
1706
+ return res.status(500).json({
1707
+ error: safeError(err),
1708
+ hint: 'The page may have changed. Call snapshot to see the current state and retry.',
1709
+ url: found.tabState.page.url(),
1710
+ refsCount: found.tabState.refs.size,
1711
+ });
1712
+ }
1713
+ } catch (refreshErr) {
1714
+ log('warn', 'post-timeout refresh failed', { error: refreshErr.message });
1715
+ }
1716
+ }
1717
+ handleRouteError(err, req, res);
1370
1718
  }
1371
1719
  });
1372
1720
 
@@ -1381,7 +1729,7 @@ app.post('/tabs/:tabId/press', async (req, res) => {
1381
1729
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1382
1730
 
1383
1731
  const { tabState } = found;
1384
- tabState.toolCalls++;
1732
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1385
1733
 
1386
1734
  await withTabLock(tabId, async () => {
1387
1735
  await tabState.page.keyboard.press(key);
@@ -1390,7 +1738,7 @@ app.post('/tabs/:tabId/press', async (req, res) => {
1390
1738
  res.json({ ok: true });
1391
1739
  } catch (err) {
1392
1740
  log('error', 'press failed', { reqId: req.reqId, error: err.message });
1393
- res.status(500).json({ error: safeError(err) });
1741
+ handleRouteError(err, req, res);
1394
1742
  }
1395
1743
  });
1396
1744
 
@@ -1403,7 +1751,7 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
1403
1751
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1404
1752
 
1405
1753
  const { tabState } = found;
1406
- tabState.toolCalls++;
1754
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1407
1755
 
1408
1756
  const delta = direction === 'up' ? -amount : amount;
1409
1757
  await tabState.page.mouse.wheel(0, delta);
@@ -1412,7 +1760,7 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
1412
1760
  res.json({ ok: true });
1413
1761
  } catch (err) {
1414
1762
  log('error', 'scroll failed', { reqId: req.reqId, error: err.message });
1415
- res.status(500).json({ error: safeError(err) });
1763
+ handleRouteError(err, req, res);
1416
1764
  }
1417
1765
  });
1418
1766
 
@@ -1427,18 +1775,18 @@ app.post('/tabs/:tabId/back', async (req, res) => {
1427
1775
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1428
1776
 
1429
1777
  const { tabState } = found;
1430
- tabState.toolCalls++;
1778
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1431
1779
 
1432
- const result = await withTimeout(withTabLock(tabId, async () => {
1780
+ const result = await withTabLock(tabId, async () => {
1433
1781
  await tabState.page.goBack({ timeout: 10000 });
1434
1782
  tabState.refs = await buildRefs(tabState.page);
1435
1783
  return { ok: true, url: tabState.page.url() };
1436
- }), HANDLER_TIMEOUT_MS, 'back');
1784
+ });
1437
1785
 
1438
1786
  res.json(result);
1439
1787
  } catch (err) {
1440
1788
  log('error', 'back failed', { reqId: req.reqId, error: err.message });
1441
- res.status(500).json({ error: safeError(err) });
1789
+ handleRouteError(err, req, res);
1442
1790
  }
1443
1791
  });
1444
1792
 
@@ -1453,18 +1801,18 @@ app.post('/tabs/:tabId/forward', async (req, res) => {
1453
1801
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1454
1802
 
1455
1803
  const { tabState } = found;
1456
- tabState.toolCalls++;
1804
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1457
1805
 
1458
- const result = await withTimeout(withTabLock(tabId, async () => {
1806
+ const result = await withTabLock(tabId, async () => {
1459
1807
  await tabState.page.goForward({ timeout: 10000 });
1460
1808
  tabState.refs = await buildRefs(tabState.page);
1461
1809
  return { ok: true, url: tabState.page.url() };
1462
- }), HANDLER_TIMEOUT_MS, 'forward');
1810
+ });
1463
1811
 
1464
1812
  res.json(result);
1465
1813
  } catch (err) {
1466
1814
  log('error', 'forward failed', { reqId: req.reqId, error: err.message });
1467
- res.status(500).json({ error: safeError(err) });
1815
+ handleRouteError(err, req, res);
1468
1816
  }
1469
1817
  });
1470
1818
 
@@ -1479,18 +1827,18 @@ app.post('/tabs/:tabId/refresh', async (req, res) => {
1479
1827
  if (!found) return res.status(404).json({ error: 'Tab not found' });
1480
1828
 
1481
1829
  const { tabState } = found;
1482
- tabState.toolCalls++;
1830
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1483
1831
 
1484
- const result = await withTimeout(withTabLock(tabId, async () => {
1832
+ const result = await withTabLock(tabId, async () => {
1485
1833
  await tabState.page.reload({ timeout: 30000 });
1486
1834
  tabState.refs = await buildRefs(tabState.page);
1487
1835
  return { ok: true, url: tabState.page.url() };
1488
- }), HANDLER_TIMEOUT_MS, 'refresh');
1836
+ });
1489
1837
 
1490
1838
  res.json(result);
1491
1839
  } catch (err) {
1492
1840
  log('error', 'refresh failed', { reqId: req.reqId, error: err.message });
1493
- res.status(500).json({ error: safeError(err) });
1841
+ handleRouteError(err, req, res);
1494
1842
  }
1495
1843
  });
1496
1844
 
@@ -1508,7 +1856,7 @@ app.get('/tabs/:tabId/links', async (req, res) => {
1508
1856
  }
1509
1857
 
1510
1858
  const { tabState } = found;
1511
- tabState.toolCalls++;
1859
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1512
1860
 
1513
1861
  const allLinks = await tabState.page.evaluate(() => {
1514
1862
  const links = [];
@@ -1531,6 +1879,59 @@ app.get('/tabs/:tabId/links', async (req, res) => {
1531
1879
  });
1532
1880
  } catch (err) {
1533
1881
  log('error', 'links failed', { reqId: req.reqId, error: err.message });
1882
+ handleRouteError(err, req, res);
1883
+ }
1884
+ });
1885
+
1886
+ // Get captured downloads
1887
+ app.get('/tabs/:tabId/downloads', async (req, res) => {
1888
+ try {
1889
+ const userId = req.query.userId;
1890
+ const includeData = req.query.includeData === 'true';
1891
+ const consume = req.query.consume === 'true';
1892
+ const maxBytesRaw = Number(req.query.maxBytes);
1893
+ const maxBytes = Number.isFinite(maxBytesRaw) && maxBytesRaw > 0 ? maxBytesRaw : MAX_DOWNLOAD_INLINE_BYTES;
1894
+ const session = sessions.get(normalizeUserId(userId));
1895
+ const found = session && findTab(session, req.params.tabId);
1896
+ if (!found) return res.status(404).json({ error: 'Tab not found' });
1897
+
1898
+ const { tabState } = found;
1899
+ tabState.toolCalls++;
1900
+
1901
+ const downloads = await getDownloadsList(tabState, { includeData, maxBytes });
1902
+
1903
+ if (consume) {
1904
+ await clearTabDownloads(tabState);
1905
+ }
1906
+
1907
+ res.json({ tabId: req.params.tabId, downloads });
1908
+ } catch (err) {
1909
+ log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
1910
+ res.status(500).json({ error: safeError(err) });
1911
+ }
1912
+ });
1913
+
1914
+ // Get image elements from current page
1915
+ app.get('/tabs/:tabId/images', async (req, res) => {
1916
+ try {
1917
+ const userId = req.query.userId;
1918
+ const includeData = req.query.includeData === 'true';
1919
+ const maxBytesRaw = Number(req.query.maxBytes);
1920
+ const limitRaw = Number(req.query.limit);
1921
+ const maxBytes = Number.isFinite(maxBytesRaw) && maxBytesRaw > 0 ? maxBytesRaw : MAX_DOWNLOAD_INLINE_BYTES;
1922
+ const limit = Number.isFinite(limitRaw) && limitRaw > 0 ? Math.min(Math.floor(limitRaw), 20) : 8;
1923
+ const session = sessions.get(normalizeUserId(userId));
1924
+ const found = session && findTab(session, req.params.tabId);
1925
+ if (!found) return res.status(404).json({ error: 'Tab not found' });
1926
+
1927
+ const { tabState } = found;
1928
+ tabState.toolCalls++;
1929
+
1930
+ const images = await extractPageImages(tabState.page, { includeData, maxBytes, limit });
1931
+
1932
+ res.json({ tabId: req.params.tabId, images });
1933
+ } catch (err) {
1934
+ log('error', 'images failed', { reqId: req.reqId, error: err.message });
1534
1935
  res.status(500).json({ error: safeError(err) });
1535
1936
  }
1536
1937
  });
@@ -1550,7 +1951,7 @@ app.get('/tabs/:tabId/screenshot', async (req, res) => {
1550
1951
  res.send(buffer);
1551
1952
  } catch (err) {
1552
1953
  log('error', 'screenshot failed', { reqId: req.reqId, error: err.message });
1553
- res.status(500).json({ error: safeError(err) });
1954
+ handleRouteError(err, req, res);
1554
1955
  }
1555
1956
  });
1556
1957
 
@@ -1569,11 +1970,36 @@ app.get('/tabs/:tabId/stats', async (req, res) => {
1569
1970
  listItemId, // Legacy compatibility
1570
1971
  url: tabState.page.url(),
1571
1972
  visitedUrls: Array.from(tabState.visitedUrls),
1973
+ downloadsCount: Array.isArray(tabState.downloads) ? tabState.downloads.length : 0,
1572
1974
  toolCalls: tabState.toolCalls,
1573
1975
  refsCount: tabState.refs.size
1574
1976
  });
1575
1977
  } catch (err) {
1576
1978
  log('error', 'stats failed', { reqId: req.reqId, error: err.message });
1979
+ handleRouteError(err, req, res);
1980
+ }
1981
+ });
1982
+
1983
+ // Evaluate JavaScript in page context
1984
+ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, res) => {
1985
+ try {
1986
+ const { userId, expression } = req.body;
1987
+ if (!userId) return res.status(400).json({ error: 'userId is required' });
1988
+ if (!expression) return res.status(400).json({ error: 'expression is required' });
1989
+
1990
+ const session = sessions.get(normalizeUserId(userId));
1991
+ const found = session && findTab(session, req.params.tabId);
1992
+ if (!found) return res.status(404).json({ error: 'Tab not found' });
1993
+
1994
+ session.lastAccess = Date.now();
1995
+ const { tabState } = found;
1996
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1997
+
1998
+ const result = await tabState.page.evaluate(expression);
1999
+ log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
2000
+ res.json({ ok: true, result });
2001
+ } catch (err) {
2002
+ log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
1577
2003
  res.status(500).json({ error: safeError(err) });
1578
2004
  }
1579
2005
  });
@@ -1585,9 +2011,10 @@ app.delete('/tabs/:tabId', async (req, res) => {
1585
2011
  const session = sessions.get(normalizeUserId(userId));
1586
2012
  const found = session && findTab(session, req.params.tabId);
1587
2013
  if (found) {
2014
+ await clearTabDownloads(found.tabState);
1588
2015
  await safePageClose(found.tabState.page);
1589
2016
  found.group.delete(req.params.tabId);
1590
- tabLocks.delete(req.params.tabId);
2017
+ { const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); }
1591
2018
  if (found.group.size === 0) {
1592
2019
  session.tabGroups.delete(found.listItemId);
1593
2020
  }
@@ -1596,7 +2023,7 @@ app.delete('/tabs/:tabId', async (req, res) => {
1596
2023
  res.json({ ok: true });
1597
2024
  } catch (err) {
1598
2025
  log('error', 'tab close failed', { reqId: req.reqId, error: err.message });
1599
- res.status(500).json({ error: safeError(err) });
2026
+ handleRouteError(err, req, res);
1600
2027
  }
1601
2028
  });
1602
2029
 
@@ -1608,6 +2035,7 @@ app.delete('/tabs/group/:listItemId', async (req, res) => {
1608
2035
  const group = session?.tabGroups.get(req.params.listItemId);
1609
2036
  if (group) {
1610
2037
  for (const [tabId, tabState] of group) {
2038
+ await clearTabDownloads(tabState);
1611
2039
  await safePageClose(tabState.page);
1612
2040
  tabLocks.delete(tabId);
1613
2041
  }
@@ -1617,7 +2045,7 @@ app.delete('/tabs/group/:listItemId', async (req, res) => {
1617
2045
  res.json({ ok: true });
1618
2046
  } catch (err) {
1619
2047
  log('error', 'tab group close failed', { reqId: req.reqId, error: err.message });
1620
- res.status(500).json({ error: safeError(err) });
2048
+ handleRouteError(err, req, res);
1621
2049
  }
1622
2050
  });
1623
2051
 
@@ -1627,6 +2055,7 @@ app.delete('/sessions/:userId', async (req, res) => {
1627
2055
  const userId = normalizeUserId(req.params.userId);
1628
2056
  const session = sessions.get(userId);
1629
2057
  if (session) {
2058
+ await clearSessionDownloads(session);
1630
2059
  await session.context.close();
1631
2060
  sessions.delete(userId);
1632
2061
  log('info', 'session closed', { userId });
@@ -1635,7 +2064,7 @@ app.delete('/sessions/:userId', async (req, res) => {
1635
2064
  res.json({ ok: true });
1636
2065
  } catch (err) {
1637
2066
  log('error', 'session close failed', { error: err.message });
1638
- res.status(500).json({ error: safeError(err) });
2067
+ handleRouteError(err, req, res);
1639
2068
  }
1640
2069
  });
1641
2070
 
@@ -1644,6 +2073,7 @@ setInterval(() => {
1644
2073
  const now = Date.now();
1645
2074
  for (const [userId, session] of sessions) {
1646
2075
  if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
2076
+ clearSessionDownloads(session).catch(() => {});
1647
2077
  session.context.close().catch(() => {});
1648
2078
  sessions.delete(userId);
1649
2079
  log('info', 'session expired', { userId });
@@ -1655,6 +2085,37 @@ setInterval(() => {
1655
2085
  }
1656
2086
  }, 60_000);
1657
2087
 
2088
+ // Per-tab inactivity reaper — close tabs idle for TAB_INACTIVITY_MS
2089
+ setInterval(() => {
2090
+ const now = Date.now();
2091
+ for (const [userId, session] of sessions) {
2092
+ for (const [listItemId, group] of session.tabGroups) {
2093
+ for (const [tabId, tabState] of group) {
2094
+ if (!tabState._lastReaperCheck) {
2095
+ tabState._lastReaperCheck = now;
2096
+ tabState._lastReaperToolCalls = tabState.toolCalls;
2097
+ continue;
2098
+ }
2099
+ if (tabState.toolCalls === tabState._lastReaperToolCalls) {
2100
+ const idleMs = now - tabState._lastReaperCheck;
2101
+ if (idleMs >= TAB_INACTIVITY_MS) {
2102
+ log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
2103
+ safePageClose(tabState.page);
2104
+ group.delete(tabId);
2105
+ { const _l = tabLocks.get(tabId); if (_l) _l.drain(); tabLocks.delete(tabId); }
2106
+ }
2107
+ } else {
2108
+ tabState._lastReaperCheck = now;
2109
+ tabState._lastReaperToolCalls = tabState.toolCalls;
2110
+ }
2111
+ }
2112
+ if (group.size === 0) {
2113
+ session.tabGroups.delete(listItemId);
2114
+ }
2115
+ }
2116
+ }
2117
+ }, 60_000);
2118
+
1658
2119
  // =============================================================================
1659
2120
  // OpenClaw-compatible endpoint aliases
1660
2121
  // These allow camoufox to be used as a profile backend for OpenClaw's browser tool
@@ -1699,7 +2160,7 @@ app.get('/tabs', async (req, res) => {
1699
2160
  res.json({ running: true, tabs });
1700
2161
  } catch (err) {
1701
2162
  log('error', 'list tabs failed', { reqId: req.reqId, error: err.message });
1702
- res.status(500).json({ error: safeError(err) });
2163
+ handleRouteError(err, req, res);
1703
2164
  }
1704
2165
  });
1705
2166
 
@@ -1719,6 +2180,11 @@ app.post('/tabs/open', async (req, res) => {
1719
2180
 
1720
2181
  const session = await getSession(userId);
1721
2182
 
2183
+ // Check global tab limit first
2184
+ if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
2185
+ return res.status(429).json({ error: 'Maximum global tabs reached' });
2186
+ }
2187
+
1722
2188
  let totalTabs = 0;
1723
2189
  for (const g of session.tabGroups.values()) totalTabs += g.size;
1724
2190
  if (totalTabs >= MAX_TABS_PER_SESSION) {
@@ -1730,6 +2196,7 @@ app.post('/tabs/open', async (req, res) => {
1730
2196
  const page = await session.context.newPage();
1731
2197
  const tabId = crypto.randomUUID();
1732
2198
  const tabState = createTabState(page);
2199
+ attachDownloadListener(tabState, tabId, log);
1733
2200
  group.set(tabId, tabState);
1734
2201
 
1735
2202
  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
@@ -1745,7 +2212,7 @@ app.post('/tabs/open', async (req, res) => {
1745
2212
  });
1746
2213
  } catch (err) {
1747
2214
  log('error', 'openclaw tab open failed', { reqId: req.reqId, error: err.message });
1748
- res.status(500).json({ error: safeError(err) });
2215
+ handleRouteError(err, req, res);
1749
2216
  }
1750
2217
  });
1751
2218
 
@@ -1770,6 +2237,11 @@ app.post('/stop', async (req, res) => {
1770
2237
  await browser.close().catch(() => {});
1771
2238
  browser = null;
1772
2239
  }
2240
+ const cleanupTasks = [];
2241
+ for (const session of sessions.values()) {
2242
+ cleanupTasks.push(clearSessionDownloads(session));
2243
+ }
2244
+ await Promise.all(cleanupTasks);
1773
2245
  sessions.clear();
1774
2246
  res.json({ ok: true, stopped: true, profile: 'camoufox' });
1775
2247
  } catch (err) {
@@ -1798,19 +2270,27 @@ app.post('/navigate', async (req, res) => {
1798
2270
  }
1799
2271
 
1800
2272
  const { tabState } = found;
1801
- tabState.toolCalls++;
2273
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1802
2274
 
1803
- const result = await withTimeout(withTabLock(targetId, async () => {
2275
+ const result = await withTabLock(targetId, async () => {
1804
2276
  await tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1805
2277
  tabState.visitedUrls.add(url);
2278
+ tabState.lastSnapshot = null;
2279
+
2280
+ // Google SERP: defer extraction to snapshot call
2281
+ if (isGoogleSerp(tabState.page.url())) {
2282
+ tabState.refs = new Map();
2283
+ return { ok: true, targetId, url: tabState.page.url(), googleSerp: true };
2284
+ }
2285
+
1806
2286
  tabState.refs = await buildRefs(tabState.page);
1807
2287
  return { ok: true, targetId, url: tabState.page.url() };
1808
- }), HANDLER_TIMEOUT_MS, 'openclaw-navigate');
2288
+ });
1809
2289
 
1810
2290
  res.json(result);
1811
2291
  } catch (err) {
1812
2292
  log('error', 'openclaw navigate failed', { reqId: req.reqId, error: err.message });
1813
- res.status(500).json({ error: safeError(err) });
2293
+ handleRouteError(err, req, res);
1814
2294
  }
1815
2295
  });
1816
2296
 
@@ -1830,7 +2310,7 @@ app.get('/snapshot', async (req, res) => {
1830
2310
  }
1831
2311
 
1832
2312
  const { tabState } = found;
1833
- tabState.toolCalls++;
2313
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1834
2314
 
1835
2315
  // Cached chunk retrieval
1836
2316
  if (offset > 0 && tabState.lastSnapshot) {
@@ -1843,6 +2323,28 @@ app.get('/snapshot', async (req, res) => {
1843
2323
  return res.json(response);
1844
2324
  }
1845
2325
 
2326
+ const pageUrl = tabState.page.url();
2327
+
2328
+ // Google SERP fast path
2329
+ if (isGoogleSerp(pageUrl)) {
2330
+ const { refs: googleRefs, snapshot: googleSnapshot } = await extractGoogleSerp(tabState.page);
2331
+ tabState.refs = googleRefs;
2332
+ tabState.lastSnapshot = googleSnapshot;
2333
+ const annotatedYaml = googleSnapshot;
2334
+ const win = windowSnapshot(annotatedYaml, 0);
2335
+ const response = {
2336
+ ok: true, format: 'aria', targetId, url: pageUrl,
2337
+ snapshot: win.text, refsCount: tabState.refs.size,
2338
+ truncated: win.truncated, totalChars: win.totalChars,
2339
+ hasMore: win.hasMore, nextOffset: win.nextOffset,
2340
+ };
2341
+ if (req.query.includeScreenshot === 'true') {
2342
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
2343
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
2344
+ }
2345
+ return res.json(response);
2346
+ }
2347
+
1846
2348
  tabState.refs = await buildRefs(tabState.page);
1847
2349
 
1848
2350
  const ariaYaml = await getAriaSnapshot(tabState.page);
@@ -1895,7 +2397,7 @@ app.get('/snapshot', async (req, res) => {
1895
2397
  res.json(response);
1896
2398
  } catch (err) {
1897
2399
  log('error', 'openclaw snapshot failed', { reqId: req.reqId, error: err.message });
1898
- res.status(500).json({ error: safeError(err) });
2400
+ handleRouteError(err, req, res);
1899
2401
  }
1900
2402
  });
1901
2403
 
@@ -1919,9 +2421,9 @@ app.post('/act', async (req, res) => {
1919
2421
  }
1920
2422
 
1921
2423
  const { tabState } = found;
1922
- tabState.toolCalls++;
2424
+ tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
1923
2425
 
1924
- const result = await withTimeout(withTabLock(targetId, async () => {
2426
+ const result = await withTabLock(targetId, async () => {
1925
2427
  switch (kind) {
1926
2428
  case 'click': {
1927
2429
  const { ref, selector, doubleClick } = params;
@@ -1931,7 +2433,7 @@ app.post('/act', async (req, res) => {
1931
2433
 
1932
2434
  const doClick = async (locatorOrSelector, isLocator) => {
1933
2435
  const locator = isLocator ? locatorOrSelector : tabState.page.locator(locatorOrSelector);
1934
- const clickOpts = { timeout: 5000 };
2436
+ const clickOpts = { timeout: 3000 };
1935
2437
  if (doubleClick) clickOpts.clickCount = 2;
1936
2438
 
1937
2439
  try {
@@ -1946,8 +2448,13 @@ app.post('/act', async (req, res) => {
1946
2448
  };
1947
2449
 
1948
2450
  if (ref) {
1949
- const locator = refToLocator(tabState.page, ref, tabState.refs);
1950
- if (!locator) throw new Error(`Unknown ref: ${ref}`);
2451
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
2452
+ if (!locator) {
2453
+ log('info', 'auto-refreshing refs before click (openclaw)', { ref, hadRefs: tabState.refs.size });
2454
+ tabState.refs = await buildRefs(tabState.page);
2455
+ locator = refToLocator(tabState.page, ref, tabState.refs);
2456
+ }
2457
+ if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
1951
2458
  await doClick(locator, true);
1952
2459
  } else {
1953
2460
  await doClick(selector, false);
@@ -1968,8 +2475,13 @@ app.post('/act', async (req, res) => {
1968
2475
  }
1969
2476
 
1970
2477
  if (ref) {
1971
- const locator = refToLocator(tabState.page, ref, tabState.refs);
1972
- if (!locator) throw new Error(`Unknown ref: ${ref}`);
2478
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
2479
+ if (!locator) {
2480
+ log('info', 'auto-refreshing refs before type (openclaw)', { ref, hadRefs: tabState.refs.size });
2481
+ tabState.refs = await buildRefs(tabState.page);
2482
+ locator = refToLocator(tabState.page, ref, tabState.refs);
2483
+ }
2484
+ if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
1973
2485
  await locator.fill(text, { timeout: 10000 });
1974
2486
  if (submit) await tabState.page.keyboard.press('Enter');
1975
2487
  } else {
@@ -1990,8 +2502,12 @@ app.post('/act', async (req, res) => {
1990
2502
  case 'scrollIntoView': {
1991
2503
  const { ref, direction = 'down', amount = 500 } = params;
1992
2504
  if (ref) {
1993
- const locator = refToLocator(tabState.page, ref, tabState.refs);
1994
- if (!locator) throw new Error(`Unknown ref: ${ref}`);
2505
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
2506
+ if (!locator) {
2507
+ tabState.refs = await buildRefs(tabState.page);
2508
+ locator = refToLocator(tabState.page, ref, tabState.refs);
2509
+ }
2510
+ if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
1995
2511
  await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
1996
2512
  } else {
1997
2513
  const delta = direction === 'up' ? -amount : amount;
@@ -2006,8 +2522,12 @@ app.post('/act', async (req, res) => {
2006
2522
  if (!ref && !selector) throw new Error('ref or selector required');
2007
2523
 
2008
2524
  if (ref) {
2009
- const locator = refToLocator(tabState.page, ref, tabState.refs);
2010
- if (!locator) throw new Error(`Unknown ref: ${ref}`);
2525
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
2526
+ if (!locator) {
2527
+ tabState.refs = await buildRefs(tabState.page);
2528
+ locator = refToLocator(tabState.page, ref, tabState.refs);
2529
+ }
2530
+ if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
2011
2531
  await locator.hover({ timeout: 5000 });
2012
2532
  } else {
2013
2533
  await tabState.page.locator(selector).hover({ timeout: 5000 });
@@ -2030,19 +2550,19 @@ app.post('/act', async (req, res) => {
2030
2550
  case 'close': {
2031
2551
  await safePageClose(tabState.page);
2032
2552
  found.group.delete(targetId);
2033
- tabLocks.delete(targetId);
2553
+ { const _l = tabLocks.get(targetId); if (_l) _l.drain(); tabLocks.delete(targetId); }
2034
2554
  return { ok: true, targetId };
2035
2555
  }
2036
2556
 
2037
2557
  default:
2038
2558
  throw new Error(`Unsupported action kind: ${kind}`);
2039
2559
  }
2040
- }), HANDLER_TIMEOUT_MS, 'act');
2560
+ });
2041
2561
 
2042
2562
  res.json(result);
2043
2563
  } catch (err) {
2044
2564
  log('error', 'act failed', { reqId: req.reqId, kind: req.body?.kind, error: err.message });
2045
- res.status(500).json({ error: safeError(err) });
2565
+ handleRouteError(err, req, res);
2046
2566
  }
2047
2567
  });
2048
2568
 
@@ -2068,14 +2588,20 @@ setInterval(() => {
2068
2588
  // Active health probe — detect hung browser even when isConnected() lies
2069
2589
  setInterval(async () => {
2070
2590
  if (!browser || healthState.isRecovering) return;
2071
- // Skip probe if operations are in flight
2072
- if (healthState.activeOps > 0) {
2591
+ const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
2592
+ // Skip probe if operations are in flight AND last success was recent.
2593
+ // If it's been >120s since any successful operation, probe anyway —
2594
+ // active ops are likely stuck on a frozen browser and will time out eventually.
2595
+ if (healthState.activeOps > 0 && timeSinceSuccess < 120000) {
2073
2596
  log('info', 'health probe skipped, operations active', { activeOps: healthState.activeOps });
2074
2597
  return;
2075
2598
  }
2076
- const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
2077
2599
  if (timeSinceSuccess < 120000) return;
2078
2600
 
2601
+ if (healthState.activeOps > 0) {
2602
+ log('warn', 'health probe forced despite active ops', { activeOps: healthState.activeOps, timeSinceSuccessMs: timeSinceSuccess });
2603
+ }
2604
+
2079
2605
  let testContext;
2080
2606
  try {
2081
2607
  testContext = await browser.newContext();
@@ -2127,9 +2653,16 @@ process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
2127
2653
  process.on('SIGINT', () => gracefulShutdown('SIGINT'));
2128
2654
 
2129
2655
  const PORT = CONFIG.port;
2130
- const server = app.listen(PORT, () => {
2656
+ const server = app.listen(PORT, async () => {
2131
2657
  log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
2132
- // Browser launches lazily on first request (saves ~550MB when idle)
2658
+ // Pre-warm browser so first request doesn't eat a 6-7s cold start
2659
+ try {
2660
+ const start = Date.now();
2661
+ await ensureBrowser();
2662
+ log('info', 'browser pre-warmed', { ms: Date.now() - start });
2663
+ } catch (err) {
2664
+ log('error', 'browser pre-warm failed (will retry on first request)', { error: err.message });
2665
+ }
2133
2666
  });
2134
2667
 
2135
2668
  server.on('error', (err) => {