@askjo/camofox-browser 1.1.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -5,6 +5,7 @@ const crypto = require('crypto');
5
5
  const os = require('os');
6
6
  const { expandMacro } = require('./lib/macros');
7
7
  const { loadConfig } = require('./lib/config');
8
+ const { windowSnapshot } = require('./lib/snapshot');
8
9
 
9
10
  const CONFIG = loadConfig();
10
11
 
@@ -46,6 +47,19 @@ app.use((req, res, next) => {
46
47
 
47
48
  const ALLOWED_URL_SCHEMES = ['http:', 'https:'];
48
49
 
50
+ // Interactive roles to include - exclude combobox to avoid opening complex widgets
51
+ // (date pickers, dropdowns) that can interfere with navigation
52
+ const INTERACTIVE_ROLES = [
53
+ 'button', 'link', 'textbox', 'checkbox', 'radio',
54
+ 'menuitem', 'tab', 'searchbox', 'slider', 'spinbutton', 'switch'
55
+ // 'combobox' excluded - can trigger date pickers and complex dropdowns
56
+ ];
57
+
58
+ // Patterns to skip (date pickers, calendar widgets)
59
+ const SKIP_PATTERNS = [
60
+ /date/i, /calendar/i, /picker/i, /datepicker/i
61
+ ];
62
+
49
63
  function timingSafeCompare(a, b) {
50
64
  if (typeof a !== 'string' || typeof b !== 'string') return false;
51
65
  const bufA = Buffer.from(a);
@@ -158,10 +172,18 @@ let browser = null;
158
172
  // Note: sessionKey was previously called listItemId - both are accepted for backward compatibility
159
173
  const sessions = new Map();
160
174
 
161
- const SESSION_TIMEOUT_MS = 30 * 60 * 1000; // 30 min
175
+ const SESSION_TIMEOUT_MS = parseInt(process.env.SESSION_TIMEOUT_MS) || 1800000; // 30 min
162
176
  const MAX_SNAPSHOT_NODES = 500;
163
- const MAX_SESSIONS = 50;
164
- const MAX_TABS_PER_SESSION = 10;
177
+ const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS) || 50;
178
+ const MAX_TABS_PER_SESSION = parseInt(process.env.MAX_TABS_PER_SESSION) || 10;
179
+ const MAX_TABS_GLOBAL = parseInt(process.env.MAX_TABS_GLOBAL) || 10;
180
+ const HANDLER_TIMEOUT_MS = parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000;
181
+ const MAX_CONCURRENT_PER_USER = parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3;
182
+ const PAGE_CLOSE_TIMEOUT_MS = 5000;
183
+ const NAVIGATE_TIMEOUT_MS = parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000;
184
+ const BUILDREFS_TIMEOUT_MS = parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000;
185
+ const FAILURE_THRESHOLD = 3;
186
+ const TAB_LOCK_TIMEOUT_MS = 30000;
165
187
 
166
188
  // Per-tab locks to serialize operations on the same tab
167
189
  // tabId -> Promise (the currently executing operation)
@@ -172,9 +194,14 @@ async function withTabLock(tabId, operation) {
172
194
  const pending = tabLocks.get(tabId);
173
195
  if (pending) {
174
196
  try {
175
- await pending;
197
+ await Promise.race([
198
+ pending,
199
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Tab lock timeout')), TAB_LOCK_TIMEOUT_MS))
200
+ ]);
176
201
  } catch (e) {
177
- // Previous operation failed, continue anyway
202
+ if (e.message === 'Tab lock timeout') {
203
+ log('warn', 'tab lock timeout, proceeding', { tabId });
204
+ }
178
205
  }
179
206
  }
180
207
 
@@ -192,6 +219,60 @@ async function withTabLock(tabId, operation) {
192
219
  }
193
220
  }
194
221
 
222
+ function withTimeout(promise, ms, label) {
223
+ return Promise.race([
224
+ promise,
225
+ new Promise((_, reject) =>
226
+ setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms)
227
+ )
228
+ ]);
229
+ }
230
+
231
+ const userConcurrency = new Map();
232
+
233
+ async function withUserLimit(userId, operation) {
234
+ const key = normalizeUserId(userId);
235
+ let state = userConcurrency.get(key);
236
+ if (!state) {
237
+ state = { active: 0, queue: [] };
238
+ userConcurrency.set(key, state);
239
+ }
240
+ if (state.active >= MAX_CONCURRENT_PER_USER) {
241
+ await new Promise((resolve, reject) => {
242
+ const timer = setTimeout(() => reject(new Error('User concurrency limit reached, try again')), 30000);
243
+ state.queue.push(() => { clearTimeout(timer); resolve(); });
244
+ });
245
+ }
246
+ state.active++;
247
+ healthState.activeOps++;
248
+ try {
249
+ const result = await operation();
250
+ healthState.lastSuccessfulNav = Date.now();
251
+ return result;
252
+ } finally {
253
+ healthState.activeOps--;
254
+ state.active--;
255
+ if (state.queue.length > 0) {
256
+ const next = state.queue.shift();
257
+ next();
258
+ }
259
+ if (state.active === 0 && state.queue.length === 0) {
260
+ userConcurrency.delete(key);
261
+ }
262
+ }
263
+ }
264
+
265
+ async function safePageClose(page) {
266
+ try {
267
+ await Promise.race([
268
+ page.close(),
269
+ new Promise(resolve => setTimeout(resolve, PAGE_CLOSE_TIMEOUT_MS))
270
+ ]);
271
+ } catch (e) {
272
+ log('warn', 'page close failed', { error: e.message });
273
+ }
274
+ }
275
+
195
276
  // Detect host OS for fingerprint generation
196
277
  function getHostOS() {
197
278
  const platform = os.platform();
@@ -216,26 +297,123 @@ function buildProxyConfig() {
216
297
  };
217
298
  }
218
299
 
300
+ const BROWSER_IDLE_TIMEOUT_MS = parseInt(process.env.BROWSER_IDLE_TIMEOUT_MS) || 300000; // 5 min
301
+ let browserIdleTimer = null;
302
+ let browserLaunchPromise = null;
303
+
304
+ function scheduleBrowserIdleShutdown() {
305
+ clearBrowserIdleTimer();
306
+ if (sessions.size === 0 && browser) {
307
+ browserIdleTimer = setTimeout(async () => {
308
+ if (sessions.size === 0 && browser) {
309
+ log('info', 'browser idle shutdown (no sessions)');
310
+ const b = browser;
311
+ browser = null;
312
+ await b.close().catch(() => {});
313
+ }
314
+ }, BROWSER_IDLE_TIMEOUT_MS);
315
+ }
316
+ }
317
+
318
+ function clearBrowserIdleTimer() {
319
+ if (browserIdleTimer) {
320
+ clearTimeout(browserIdleTimer);
321
+ browserIdleTimer = null;
322
+ }
323
+ }
324
+
325
+ // --- Browser health tracking ---
326
+ const healthState = {
327
+ consecutiveNavFailures: 0,
328
+ lastSuccessfulNav: Date.now(),
329
+ isRecovering: false,
330
+ activeOps: 0,
331
+ };
332
+
333
+ function recordNavSuccess() {
334
+ healthState.consecutiveNavFailures = 0;
335
+ healthState.lastSuccessfulNav = Date.now();
336
+ }
337
+
338
+ function recordNavFailure() {
339
+ healthState.consecutiveNavFailures++;
340
+ return healthState.consecutiveNavFailures >= FAILURE_THRESHOLD;
341
+ }
342
+
343
+ async function restartBrowser(reason) {
344
+ if (healthState.isRecovering) return;
345
+ healthState.isRecovering = true;
346
+ log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
347
+ try {
348
+ for (const [, session] of sessions) {
349
+ await session.context.close().catch(() => {});
350
+ }
351
+ sessions.clear();
352
+ if (browser) {
353
+ await browser.close().catch(() => {});
354
+ browser = null;
355
+ }
356
+ browserLaunchPromise = null;
357
+ await ensureBrowser();
358
+ healthState.consecutiveNavFailures = 0;
359
+ healthState.lastSuccessfulNav = Date.now();
360
+ log('info', 'browser restarted successfully');
361
+ } catch (err) {
362
+ log('error', 'browser restart failed', { error: err.message });
363
+ } finally {
364
+ healthState.isRecovering = false;
365
+ }
366
+ }
367
+
368
+ function getTotalTabCount() {
369
+ let total = 0;
370
+ for (const session of sessions.values()) {
371
+ for (const group of session.tabGroups.values()) {
372
+ total += group.size;
373
+ }
374
+ }
375
+ return total;
376
+ }
377
+
378
+ async function launchBrowserInstance() {
379
+ const hostOS = getHostOS();
380
+ const proxy = buildProxyConfig();
381
+
382
+ log('info', 'launching camoufox', { hostOS, geoip: !!proxy });
383
+
384
+ const options = await launchOptions({
385
+ headless: true,
386
+ os: hostOS,
387
+ humanize: true,
388
+ enable_cache: true,
389
+ proxy: proxy,
390
+ geoip: !!proxy,
391
+ });
392
+
393
+ browser = await firefox.launch(options);
394
+ log('info', 'camoufox launched');
395
+ return browser;
396
+ }
397
+
219
398
  async function ensureBrowser() {
220
- if (!browser) {
221
- const hostOS = getHostOS();
222
- const proxy = buildProxyConfig();
223
-
224
- log('info', 'launching camoufox', { hostOS, geoip: !!proxy });
225
-
226
- const options = await launchOptions({
227
- headless: true,
228
- os: hostOS,
229
- humanize: true,
230
- enable_cache: true,
231
- proxy: proxy,
232
- geoip: !!proxy,
399
+ clearBrowserIdleTimer();
400
+ if (browser && !browser.isConnected()) {
401
+ log('warn', 'browser disconnected, clearing dead sessions and relaunching', {
402
+ deadSessions: sessions.size,
233
403
  });
234
-
235
- browser = await firefox.launch(options);
236
- log('info', 'camoufox launched');
404
+ for (const [userId, session] of sessions) {
405
+ await session.context.close().catch(() => {});
406
+ }
407
+ sessions.clear();
408
+ browser = null;
237
409
  }
238
- return browser;
410
+ if (browser) return browser;
411
+ if (browserLaunchPromise) return browserLaunchPromise;
412
+ browserLaunchPromise = Promise.race([
413
+ launchBrowserInstance(),
414
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Browser launch timeout (30s)')), 30000)),
415
+ ]).finally(() => { browserLaunchPromise = null; });
416
+ return browserLaunchPromise;
239
417
  }
240
418
 
241
419
  // Helper to normalize userId to string (JSON body may parse as number)
@@ -296,7 +474,8 @@ function createTabState(page) {
296
474
  page,
297
475
  refs: new Map(),
298
476
  visitedUrls: new Set(),
299
- toolCalls: 0
477
+ toolCalls: 0,
478
+ lastSnapshot: null,
300
479
  };
301
480
  }
302
481
 
@@ -397,40 +576,53 @@ async function buildRefs(page) {
397
576
  return refs;
398
577
  }
399
578
 
579
+ const start = Date.now();
580
+
581
+ // Hard total timeout on the entire buildRefs operation
582
+ const timeoutPromise = new Promise((_, reject) =>
583
+ setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
584
+ );
585
+
586
+ try {
587
+ return await Promise.race([
588
+ _buildRefsInner(page, refs, start),
589
+ timeoutPromise
590
+ ]);
591
+ } catch (err) {
592
+ if (err.message === 'buildRefs_timeout') {
593
+ log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
594
+ return refs;
595
+ }
596
+ throw err;
597
+ }
598
+ }
599
+
600
+ async function _buildRefsInner(page, refs, start) {
400
601
  await waitForPageReady(page, { waitForNetwork: false });
401
602
 
402
- // Get ARIA snapshot including shadow DOM content
403
- // Playwright's ariaSnapshot already traverses shadow roots, but we also
404
- // inject a script to collect shadow DOM elements for additional coverage
603
+ // Budget remaining time for ariaSnapshot
604
+ const elapsed = Date.now() - start;
605
+ const remaining = BUILDREFS_TIMEOUT_MS - elapsed;
606
+ if (remaining < 2000) {
607
+ log('warn', 'buildRefs: insufficient time for ariaSnapshot', { elapsed });
608
+ return refs;
609
+ }
610
+
405
611
  let ariaYaml;
406
612
  try {
407
- ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 10000 });
613
+ ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(remaining - 1000, 5000) });
408
614
  } catch (err) {
409
615
  log('warn', 'ariaSnapshot failed, retrying');
410
- await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
411
- ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 10000 });
616
+ const retryBudget = BUILDREFS_TIMEOUT_MS - (Date.now() - start);
617
+ if (retryBudget < 2000) return refs;
618
+ try {
619
+ ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(retryBudget - 500, 5000) });
620
+ } catch (retryErr) {
621
+ log('warn', 'ariaSnapshot retry failed, returning empty refs', { error: retryErr.message });
622
+ return refs;
623
+ }
412
624
  }
413
625
 
414
- // Collect additional interactive elements from shadow DOM
415
- const shadowElements = await page.evaluate(() => {
416
- const elements = [];
417
- const collectFromShadow = (root, depth = 0) => {
418
- if (depth > 5) return; // Limit recursion
419
- const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
420
- while (walker.nextNode()) {
421
- const el = walker.currentNode;
422
- if (el.shadowRoot) {
423
- collectFromShadow(el.shadowRoot, depth + 1);
424
- }
425
- }
426
- };
427
- // Start collection from all shadow roots
428
- document.querySelectorAll('*').forEach(el => {
429
- if (el.shadowRoot) collectFromShadow(el.shadowRoot);
430
- });
431
- return elements;
432
- }).catch(() => []);
433
-
434
626
  if (!ariaYaml) {
435
627
  log('warn', 'buildRefs: no aria snapshot');
436
628
  return refs;
@@ -439,19 +631,6 @@ async function buildRefs(page) {
439
631
  const lines = ariaYaml.split('\n');
440
632
  let refCounter = 1;
441
633
 
442
- // Interactive roles to include - exclude combobox to avoid opening complex widgets
443
- // (date pickers, dropdowns) that can interfere with navigation
444
- const interactiveRoles = [
445
- 'button', 'link', 'textbox', 'checkbox', 'radio',
446
- 'menuitem', 'tab', 'searchbox', 'slider', 'spinbutton', 'switch'
447
- // 'combobox' excluded - can trigger date pickers and complex dropdowns
448
- ];
449
-
450
- // Patterns to skip (date pickers, calendar widgets)
451
- const skipPatterns = [
452
- /date/i, /calendar/i, /picker/i, /datepicker/i
453
- ];
454
-
455
634
  // Track occurrences of each role+name combo for nth disambiguation
456
635
  const seenCounts = new Map(); // "role:name" -> count
457
636
 
@@ -463,13 +642,11 @@ async function buildRefs(page) {
463
642
  const [, role, name] = match;
464
643
  const normalizedRole = role.toLowerCase();
465
644
 
466
- // Skip combobox role entirely (date pickers, complex dropdowns)
467
645
  if (normalizedRole === 'combobox') continue;
468
646
 
469
- // Skip elements with date/calendar-related names
470
- if (name && skipPatterns.some(p => p.test(name))) continue;
647
+ if (name && SKIP_PATTERNS.some(p => p.test(name))) continue;
471
648
 
472
- if (interactiveRoles.includes(normalizedRole)) {
649
+ if (INTERACTIVE_ROLES.includes(normalizedRole)) {
473
650
  const normalizedName = name || '';
474
651
  const key = `${normalizedRole}:${normalizedName}`;
475
652
 
@@ -491,7 +668,12 @@ async function getAriaSnapshot(page) {
491
668
  return null;
492
669
  }
493
670
  await waitForPageReady(page, { waitForNetwork: false });
494
- return await page.locator('body').ariaSnapshot({ timeout: 10000 });
671
+ try {
672
+ return await page.locator('body').ariaSnapshot({ timeout: 5000 });
673
+ } catch (err) {
674
+ log('warn', 'getAriaSnapshot failed', { error: err.message });
675
+ return null;
676
+ }
495
677
  }
496
678
 
497
679
  function refToLocator(page, ref, refs) {
@@ -508,18 +690,315 @@ function refToLocator(page, ref, refs) {
508
690
  return locator;
509
691
  }
510
692
 
511
- // Health check
512
- app.get('/health', async (req, res) => {
693
+ // --- YouTube transcript extraction via yt-dlp ---
694
+ // POST /youtube/transcript { url, languages? }
695
+ // Uses yt-dlp to extract subtitles — no browser needed, no ads, no playback.
696
+ // yt-dlp handles YouTube's signed caption URLs correctly.
697
+ // Falls back to Camoufox page intercept if yt-dlp is not installed.
698
+
699
+ const { execFile } = require('child_process');
700
+ const { mkdtemp, readFile, readdir, rm } = require('fs/promises');
701
+ const { tmpdir } = require('os');
702
+ const { join } = require('path');
703
+
704
+ // Detect yt-dlp binary at startup
705
+ let ytDlpPath = null;
706
+ (async () => {
707
+ for (const candidate of ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp']) {
708
+ try {
709
+ await new Promise((resolve, reject) => {
710
+ execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
711
+ if (err) return reject(err);
712
+ resolve(stdout.trim());
713
+ });
714
+ });
715
+ ytDlpPath = candidate;
716
+ log('info', 'yt-dlp found', { path: candidate });
717
+ break;
718
+ } catch {}
719
+ }
720
+ if (!ytDlpPath) log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
721
+ })();
722
+
723
+ app.post('/youtube/transcript', async (req, res) => {
724
+ const reqId = req.reqId;
513
725
  try {
514
- const b = await ensureBrowser();
515
- res.json({
516
- ok: true,
517
- engine: 'camoufox',
518
- browserConnected: b.isConnected()
519
- });
726
+ const { url, languages = ['en'] } = req.body;
727
+ if (!url) return res.status(400).json({ error: 'url is required' });
728
+
729
+ const urlErr = validateUrl(url);
730
+ if (urlErr) return res.status(400).json({ error: urlErr });
731
+
732
+ const videoIdMatch = url.match(
733
+ /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})/
734
+ );
735
+ if (!videoIdMatch) {
736
+ return res.status(400).json({ error: 'Could not extract YouTube video ID from URL' });
737
+ }
738
+ const videoId = videoIdMatch[1];
739
+ const lang = languages[0] || 'en';
740
+
741
+ log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: ytDlpPath ? 'yt-dlp' : 'browser' });
742
+
743
+ let result;
744
+ if (ytDlpPath) {
745
+ result = await ytDlpTranscript(reqId, url, videoId, lang);
746
+ } else {
747
+ result = await browserTranscript(reqId, url, videoId, lang);
748
+ }
749
+
750
+ log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
751
+ res.json(result);
520
752
  } catch (err) {
521
- res.status(500).json({ ok: false, error: safeError(err) });
753
+ log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
754
+ res.status(500).json({ error: safeError(err) });
755
+ }
756
+ });
757
+
758
+ // Strategy 1: yt-dlp (preferred — fast, no browser, no ads)
759
+ async function ytDlpTranscript(reqId, url, videoId, lang) {
760
+ const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
761
+ try {
762
+ // Step 1: Get title via --print (fast, no download)
763
+ const title = await new Promise((resolve, reject) => {
764
+ execFile(ytDlpPath, [
765
+ '--skip-download', '--no-warnings', '--print', '%(title)s', url,
766
+ ], { timeout: 15000 }, (err, stdout) => {
767
+ if (err) return reject(new Error(`yt-dlp metadata failed: ${err.message}`));
768
+ resolve(stdout.trim().split('\n')[0] || '');
769
+ });
770
+ });
771
+
772
+ // Step 2: Download subtitles to temp dir
773
+ await new Promise((resolve, reject) => {
774
+ execFile(ytDlpPath, [
775
+ '--skip-download',
776
+ '--write-sub', '--write-auto-sub',
777
+ '--sub-lang', lang,
778
+ '--sub-format', 'json3',
779
+ '-o', join(tmpDir, '%(id)s'),
780
+ url,
781
+ ], { timeout: 30000 }, (err, stdout, stderr) => {
782
+ if (err) return reject(new Error(`yt-dlp subtitle download failed: ${err.message}\n${stderr}`));
783
+ resolve();
784
+ });
785
+ });
786
+
787
+ // Find the subtitle file
788
+ const files = await readdir(tmpDir);
789
+ const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
790
+ if (!subFile) {
791
+ return {
792
+ status: 'error', code: 404,
793
+ message: 'No captions available for this video',
794
+ video_url: url, video_id: videoId, title,
795
+ };
796
+ }
797
+
798
+ const content = await readFile(join(tmpDir, subFile), 'utf8');
799
+ let transcriptText = null;
800
+
801
+ if (subFile.endsWith('.json3')) {
802
+ transcriptText = parseJson3(content);
803
+ } else if (subFile.endsWith('.vtt')) {
804
+ transcriptText = parseVtt(content);
805
+ } else {
806
+ transcriptText = parseXml(content);
807
+ }
808
+
809
+ if (!transcriptText || !transcriptText.trim()) {
810
+ return {
811
+ status: 'error', code: 404,
812
+ message: 'Subtitle file found but content was empty',
813
+ video_url: url, video_id: videoId, title,
814
+ };
815
+ }
816
+
817
+ // Detect language from filename (e.g., dQw4w9WgXcQ.en.json3)
818
+ const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
819
+
820
+ return {
821
+ status: 'ok', transcript: transcriptText,
822
+ video_url: url, video_id: videoId, video_title: title,
823
+ language: langMatch?.[1] || lang,
824
+ total_words: transcriptText.split(/\s+/).length,
825
+ };
826
+ } finally {
827
+ await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
828
+ }
829
+ }
830
+
831
+ // Strategy 2: Browser fallback — play video, intercept timedtext network response
832
+ async function browserTranscript(reqId, url, videoId, lang) {
833
+ return await withUserLimit('__yt_transcript__', async () => {
834
+ await ensureBrowser();
835
+ const session = await getSession('__yt_transcript__');
836
+ const page = await session.context.newPage();
837
+
838
+ try {
839
+ // Mute audio
840
+ await page.addInitScript(() => {
841
+ const origPlay = HTMLMediaElement.prototype.play;
842
+ HTMLMediaElement.prototype.play = function() { this.volume = 0; this.muted = true; return origPlay.call(this); };
843
+ });
844
+
845
+ // Intercept timedtext responses — filter by video ID to skip ad captions
846
+ let interceptedCaptions = null;
847
+ page.on('response', async (response) => {
848
+ const respUrl = response.url();
849
+ if (respUrl.includes('/api/timedtext') && respUrl.includes(`v=${videoId}`) && !interceptedCaptions) {
850
+ try {
851
+ const body = await response.text();
852
+ if (body && body.length > 0) interceptedCaptions = body;
853
+ } catch {}
854
+ }
855
+ });
856
+
857
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATE_TIMEOUT_MS });
858
+ await page.waitForTimeout(2000);
859
+
860
+ // Extract metadata from ytInitialPlayerResponse
861
+ const meta = await page.evaluate(() => {
862
+ const r = window.ytInitialPlayerResponse || (typeof ytInitialPlayerResponse !== 'undefined' ? ytInitialPlayerResponse : null);
863
+ if (!r) return { title: '' };
864
+ const tracks = r?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
865
+ return {
866
+ title: r?.videoDetails?.title || '',
867
+ languages: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual' })),
868
+ };
869
+ });
870
+
871
+ // Start playback to trigger caption loading
872
+ await page.evaluate(() => {
873
+ const v = document.querySelector('video');
874
+ if (v) { v.muted = true; v.play().catch(() => {}); }
875
+ }).catch(() => {});
876
+
877
+ // Wait up to 20s for the target video's captions (may need to sit through an ad)
878
+ for (let i = 0; i < 40 && !interceptedCaptions; i++) {
879
+ await page.waitForTimeout(500);
880
+ }
881
+
882
+ if (!interceptedCaptions) {
883
+ return {
884
+ status: 'error', code: 404,
885
+ message: 'No captions loaded during playback (video may have no captions, or ad blocked it)',
886
+ video_url: url, video_id: videoId, title: meta.title,
887
+ };
888
+ }
889
+
890
+ log('info', 'youtube transcript: intercepted captions', { reqId, len: interceptedCaptions.length });
891
+
892
+ let transcriptText = null;
893
+ if (interceptedCaptions.trimStart().startsWith('{')) transcriptText = parseJson3(interceptedCaptions);
894
+ else if (interceptedCaptions.includes('WEBVTT')) transcriptText = parseVtt(interceptedCaptions);
895
+ else if (interceptedCaptions.includes('<text')) transcriptText = parseXml(interceptedCaptions);
896
+
897
+ if (!transcriptText || !transcriptText.trim()) {
898
+ return {
899
+ status: 'error', code: 404,
900
+ message: 'Caption data intercepted but could not be parsed',
901
+ video_url: url, video_id: videoId, title: meta.title,
902
+ };
903
+ }
904
+
905
+ return {
906
+ status: 'ok', transcript: transcriptText,
907
+ video_url: url, video_id: videoId, video_title: meta.title,
908
+ language: lang, total_words: transcriptText.split(/\s+/).length,
909
+ available_languages: meta.languages,
910
+ };
911
+ } finally {
912
+ await safePageClose(page);
913
+ }
914
+ });
915
+ }
916
+
917
+ // --- YouTube transcript parsers ---
918
+
919
+ function parseJson3(content) {
920
+ try {
921
+ const data = JSON.parse(content);
922
+ const events = data.events || [];
923
+ const lines = [];
924
+ for (const event of events) {
925
+ const segs = event.segs || [];
926
+ if (!segs.length) continue;
927
+ const text = segs.map(s => s.utf8 || '').join('').trim();
928
+ if (!text) continue;
929
+ const tsMs = event.tStartMs || 0;
930
+ const tsSec = Math.floor(tsMs / 1000);
931
+ const mm = Math.floor(tsSec / 60);
932
+ const ss = tsSec % 60;
933
+ lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
934
+ }
935
+ return lines.join('\n');
936
+ } catch (e) {
937
+ return null;
938
+ }
939
+ }
940
+
941
+ function parseVtt(content) {
942
+ const lines = content.split('\n');
943
+ const result = [];
944
+ let currentTimestamp = '';
945
+ for (const line of lines) {
946
+ const stripped = line.trim();
947
+ if (!stripped || stripped === 'WEBVTT' || stripped.startsWith('Kind:') || stripped.startsWith('Language:') || stripped.startsWith('NOTE')) continue;
948
+ if (stripped.includes(' --> ')) {
949
+ const parts = stripped.split(' --> ');
950
+ if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
951
+ continue;
952
+ }
953
+ const text = stripped.replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
954
+ if (text && currentTimestamp) { result.push(`[${currentTimestamp}] ${text}`); currentTimestamp = ''; }
955
+ else if (text) result.push(text);
956
+ }
957
+ return result.join('\n');
958
+ }
959
+
960
+ function parseXml(content) {
961
+ const lines = [];
962
+ const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
963
+ let match;
964
+ while ((match = regex.exec(content)) !== null) {
965
+ const startSec = parseFloat(match[1]) || 0;
966
+ const text = match[2].replace(/<[^>]+>/g, '').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'").trim();
967
+ if (!text) continue;
968
+ const mm = Math.floor(startSec / 60);
969
+ const ss = Math.floor(startSec % 60);
970
+ lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
971
+ }
972
+ return lines.join('\n');
973
+ }
974
+
975
+ function formatVttTs(ts) {
976
+ const parts = ts.split(':');
977
+ if (parts.length >= 3) {
978
+ const hours = parseInt(parts[0]) || 0;
979
+ const minutes = parseInt(parts[1]) || 0;
980
+ const totalMin = hours * 60 + minutes;
981
+ const seconds = (parts[2] || '00').split('.')[0];
982
+ return `${String(totalMin).padStart(2, '0')}:${seconds}`;
983
+ } else if (parts.length === 2) {
984
+ return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
522
985
  }
986
+ return ts;
987
+ }
988
+
989
+ app.get('/health', (req, res) => {
990
+ if (healthState.isRecovering) {
991
+ return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
992
+ }
993
+ const running = browser !== null && (browser.isConnected?.() ?? false);
994
+ res.json({
995
+ ok: true,
996
+ engine: 'camoufox',
997
+ browserConnected: running,
998
+ browserRunning: running,
999
+ activeTabs: getTotalTabCount(),
1000
+ consecutiveFailures: healthState.consecutiveNavFailures,
1001
+ });
523
1002
  });
524
1003
 
525
1004
  // Create new tab
@@ -567,39 +1046,81 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
567
1046
  const tabId = req.params.tabId;
568
1047
 
569
1048
  try {
570
- const { userId, url, macro, query } = req.body;
571
- const session = sessions.get(normalizeUserId(userId));
572
- const found = session && findTab(session, tabId);
573
- if (!found) return res.status(404).json({ error: 'Tab not found' });
574
-
575
- const { tabState } = found;
576
- tabState.toolCalls++;
577
-
578
- let targetUrl = url;
579
- if (macro) {
580
- targetUrl = expandMacro(macro, query) || url;
581
- }
582
-
583
- if (!targetUrl) {
584
- return res.status(400).json({ error: 'url or macro required' });
585
- }
586
-
587
- const urlErr = validateUrl(targetUrl);
588
- if (urlErr) return res.status(400).json({ error: urlErr });
589
-
590
- // Serialize navigation operations on the same tab
591
- const result = await withTabLock(tabId, async () => {
592
- await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
593
- tabState.visitedUrls.add(targetUrl);
594
- tabState.refs = await buildRefs(tabState.page);
595
- return { ok: true, url: tabState.page.url() };
596
- });
1049
+ const { userId, url, macro, query, sessionKey, listItemId } = req.body;
1050
+ if (!userId) return res.status(400).json({ error: 'userId required' });
1051
+
1052
+ const result = await withUserLimit(userId, () => withTimeout((async () => {
1053
+ await ensureBrowser();
1054
+ let session = sessions.get(normalizeUserId(userId));
1055
+ let found = session && findTab(session, tabId);
1056
+
1057
+ let tabState;
1058
+ if (!found) {
1059
+ const resolvedSessionKey = sessionKey || listItemId || 'default';
1060
+ session = await getSession(userId);
1061
+ let sessionTabs = 0;
1062
+ for (const g of session.tabGroups.values()) sessionTabs += g.size;
1063
+ if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
1064
+ // Reuse oldest tab in session instead of rejecting
1065
+ let oldestTab = null;
1066
+ let oldestGroup = null;
1067
+ let oldestTabId = null;
1068
+ for (const [gKey, group] of session.tabGroups) {
1069
+ for (const [tid, ts] of group) {
1070
+ if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
1071
+ oldestTab = ts;
1072
+ oldestGroup = group;
1073
+ oldestTabId = tid;
1074
+ }
1075
+ }
1076
+ }
1077
+ if (oldestTab) {
1078
+ tabState = oldestTab;
1079
+ const group = getTabGroup(session, resolvedSessionKey);
1080
+ if (oldestGroup) oldestGroup.delete(oldestTabId);
1081
+ group.set(tabId, tabState);
1082
+ tabLocks.delete(oldestTabId);
1083
+ log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
1084
+ } else {
1085
+ throw new Error('Maximum tabs per session reached');
1086
+ }
1087
+ } else {
1088
+ const page = await session.context.newPage();
1089
+ tabState = createTabState(page);
1090
+ const group = getTabGroup(session, resolvedSessionKey);
1091
+ group.set(tabId, tabState);
1092
+ log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
1093
+ }
1094
+ } else {
1095
+ tabState = found.tabState;
1096
+ }
1097
+ tabState.toolCalls++;
1098
+
1099
+ let targetUrl = url;
1100
+ if (macro) {
1101
+ targetUrl = expandMacro(macro, query) || url;
1102
+ }
1103
+
1104
+ if (!targetUrl) throw new Error('url or macro required');
1105
+
1106
+ const urlErr = validateUrl(targetUrl);
1107
+ if (urlErr) throw new Error(urlErr);
1108
+
1109
+ return await withTabLock(tabId, async () => {
1110
+ await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
1111
+ tabState.visitedUrls.add(targetUrl);
1112
+ tabState.lastSnapshot = null;
1113
+ tabState.refs = await buildRefs(tabState.page);
1114
+ return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
1115
+ });
1116
+ })(), HANDLER_TIMEOUT_MS, 'navigate'));
597
1117
 
598
1118
  log('info', 'navigated', { reqId: req.reqId, tabId, url: result.url });
599
1119
  res.json(result);
600
1120
  } catch (err) {
601
1121
  log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
602
- res.status(500).json({ error: safeError(err) });
1122
+ const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
1123
+ res.status(status).json({ error: safeError(err) });
603
1124
  }
604
1125
  });
605
1126
 
@@ -607,71 +1128,88 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
607
1128
  app.get('/tabs/:tabId/snapshot', async (req, res) => {
608
1129
  try {
609
1130
  const userId = req.query.userId;
1131
+ if (!userId) return res.status(400).json({ error: 'userId required' });
610
1132
  const format = req.query.format || 'text';
1133
+ const offset = parseInt(req.query.offset) || 0;
611
1134
  const session = sessions.get(normalizeUserId(userId));
612
1135
  const found = session && findTab(session, req.params.tabId);
613
1136
  if (!found) return res.status(404).json({ error: 'Tab not found' });
614
1137
 
615
1138
  const { tabState } = found;
616
1139
  tabState.toolCalls++;
617
- tabState.refs = await buildRefs(tabState.page);
618
-
619
- const ariaYaml = await getAriaSnapshot(tabState.page);
620
-
621
- // Annotate YAML with ref IDs for interactive elements
622
- let annotatedYaml = ariaYaml || '';
623
- if (annotatedYaml && tabState.refs.size > 0) {
624
- // Build a map of role+name -> refId for annotation
625
- const refsByKey = new Map();
626
- const seenCounts = new Map();
627
- for (const [refId, info] of tabState.refs) {
628
- const key = `${info.role}:${info.name}:${info.nth}`;
629
- refsByKey.set(key, refId);
1140
+
1141
+ // Cached chunk retrieval for offset>0 requests
1142
+ if (offset > 0 && tabState.lastSnapshot) {
1143
+ const win = windowSnapshot(tabState.lastSnapshot, offset);
1144
+ const response = { url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
1145
+ if (req.query.includeScreenshot === 'true') {
1146
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
1147
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
630
1148
  }
1149
+ log('info', 'snapshot (cached offset)', { reqId: req.reqId, tabId: req.params.tabId, offset, totalChars: win.totalChars });
1150
+ return res.json(response);
1151
+ }
1152
+
1153
+ const result = await withUserLimit(userId, () => withTimeout((async () => {
1154
+ tabState.refs = await buildRefs(tabState.page);
1155
+ const ariaYaml = await getAriaSnapshot(tabState.page);
631
1156
 
632
- // Track occurrences while annotating
633
- const annotationCounts = new Map();
634
- const lines = annotatedYaml.split('\n');
635
- // Must match buildRefs - excludes combobox to avoid date pickers/complex dropdowns
636
- const interactiveRoles = [
637
- 'button', 'link', 'textbox', 'checkbox', 'radio',
638
- 'menuitem', 'tab', 'searchbox', 'slider', 'spinbutton', 'switch'
639
- ];
640
- const skipPatterns = [/date/i, /calendar/i, /picker/i, /datepicker/i];
641
-
642
- annotatedYaml = lines.map(line => {
643
- const match = line.match(/^(\s*-\s+)(\w+)(\s+"([^"]*)")?(.*)$/);
644
- if (match) {
645
- const [, prefix, role, nameMatch, name, suffix] = match;
646
- const normalizedRole = role.toLowerCase();
647
-
648
- // Skip combobox and date-related elements (same as buildRefs)
649
- if (normalizedRole === 'combobox') return line;
650
- if (name && skipPatterns.some(p => p.test(name))) return line;
651
-
652
- if (interactiveRoles.includes(normalizedRole)) {
653
- const normalizedName = name || '';
654
- const countKey = `${normalizedRole}:${normalizedName}`;
655
- const nth = annotationCounts.get(countKey) || 0;
656
- annotationCounts.set(countKey, nth + 1);
657
-
658
- const key = `${normalizedRole}:${normalizedName}:${nth}`;
659
- const refId = refsByKey.get(key);
660
- if (refId) {
661
- return `${prefix}${role}${nameMatch || ''} [${refId}]${suffix}`;
1157
+ let annotatedYaml = ariaYaml || '';
1158
+ if (annotatedYaml && tabState.refs.size > 0) {
1159
+ const refsByKey = new Map();
1160
+ for (const [refId, info] of tabState.refs) {
1161
+ const key = `${info.role}:${info.name}:${info.nth}`;
1162
+ refsByKey.set(key, refId);
1163
+ }
1164
+
1165
+ const annotationCounts = new Map();
1166
+ const lines = annotatedYaml.split('\n');
1167
+
1168
+ annotatedYaml = lines.map(line => {
1169
+ const match = line.match(/^(\s*-\s+)(\w+)(\s+"([^"]*)")?(.*)$/);
1170
+ if (match) {
1171
+ const [, prefix, role, nameMatch, name, suffix] = match;
1172
+ const normalizedRole = role.toLowerCase();
1173
+ if (normalizedRole === 'combobox') return line;
1174
+ if (name && SKIP_PATTERNS.some(p => p.test(name))) return line;
1175
+ if (INTERACTIVE_ROLES.includes(normalizedRole)) {
1176
+ const normalizedName = name || '';
1177
+ const countKey = `${normalizedRole}:${normalizedName}`;
1178
+ const nth = annotationCounts.get(countKey) || 0;
1179
+ annotationCounts.set(countKey, nth + 1);
1180
+ const key = `${normalizedRole}:${normalizedName}:${nth}`;
1181
+ const refId = refsByKey.get(key);
1182
+ if (refId) {
1183
+ return `${prefix}${role}${nameMatch || ''} [${refId}]${suffix}`;
1184
+ }
662
1185
  }
663
1186
  }
664
- }
665
- return line;
666
- }).join('\n');
667
- }
668
-
669
- const result = {
670
- url: tabState.page.url(),
671
- snapshot: annotatedYaml,
672
- refsCount: tabState.refs.size
673
- };
674
- log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount });
1187
+ return line;
1188
+ }).join('\n');
1189
+ }
1190
+
1191
+ tabState.lastSnapshot = annotatedYaml;
1192
+ const win = windowSnapshot(annotatedYaml, 0);
1193
+
1194
+ const response = {
1195
+ url: tabState.page.url(),
1196
+ snapshot: win.text,
1197
+ refsCount: tabState.refs.size,
1198
+ truncated: win.truncated,
1199
+ totalChars: win.totalChars,
1200
+ hasMore: win.hasMore,
1201
+ nextOffset: win.nextOffset,
1202
+ };
1203
+
1204
+ if (req.query.includeScreenshot === 'true') {
1205
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
1206
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
1207
+ }
1208
+
1209
+ return response;
1210
+ })(), HANDLER_TIMEOUT_MS, 'snapshot'));
1211
+
1212
+ log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
675
1213
  res.json(result);
676
1214
  } catch (err) {
677
1215
  log('error', 'snapshot failed', { reqId: req.reqId, tabId: req.params.tabId, error: err.message });
@@ -703,6 +1241,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
703
1241
 
704
1242
  try {
705
1243
  const { userId, ref, selector } = req.body;
1244
+ if (!userId) return res.status(400).json({ error: 'userId required' });
706
1245
  const session = sessions.get(normalizeUserId(userId));
707
1246
  const found = session && findTab(session, tabId);
708
1247
  if (!found) return res.status(404).json({ error: 'Tab not found' });
@@ -714,7 +1253,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
714
1253
  return res.status(400).json({ error: 'ref or selector required' });
715
1254
  }
716
1255
 
717
- const result = await withTabLock(tabId, async () => {
1256
+ const result = await withUserLimit(userId, () => withTimeout(withTabLock(tabId, async () => {
718
1257
  // Full mouse event sequence for stubborn JS click handlers (mirrors Swift WebView.swift)
719
1258
  // Dispatches: mouseover → mouseenter → mousedown → mouseup → click
720
1259
  const dispatchMouseSequence = async (locator) => {
@@ -753,7 +1292,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
753
1292
  log('warn', 'force click failed, trying mouse sequence');
754
1293
  await dispatchMouseSequence(locator);
755
1294
  }
756
- } else if (err.message.includes('not visible') || err.message.includes('timeout')) {
1295
+ } else if (err.message.includes('not visible') || err.message.toLowerCase().includes('timeout')) {
757
1296
  // Fallback 2: Element not responding to click, try mouse sequence
758
1297
  log('warn', 'click timeout, trying mouse sequence');
759
1298
  await dispatchMouseSequence(locator);
@@ -764,7 +1303,13 @@ app.post('/tabs/:tabId/click', async (req, res) => {
764
1303
  };
765
1304
 
766
1305
  if (ref) {
767
- const locator = refToLocator(tabState.page, ref, tabState.refs);
1306
+ let locator = refToLocator(tabState.page, ref, tabState.refs);
1307
+ if (!locator && tabState.refs.size === 0) {
1308
+ // Auto-refresh refs on stale state before failing
1309
+ log('info', 'auto-refreshing stale refs before click', { ref });
1310
+ tabState.refs = await buildRefs(tabState.page);
1311
+ locator = refToLocator(tabState.page, ref, tabState.refs);
1312
+ }
768
1313
  if (!locator) {
769
1314
  const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none';
770
1315
  throw new Error(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${tabState.refs.size} total). Refs reset after navigation - call snapshot first.`);
@@ -775,12 +1320,13 @@ app.post('/tabs/:tabId/click', async (req, res) => {
775
1320
  }
776
1321
 
777
1322
  await tabState.page.waitForTimeout(500);
1323
+ tabState.lastSnapshot = null;
778
1324
  tabState.refs = await buildRefs(tabState.page);
779
1325
 
780
1326
  const newUrl = tabState.page.url();
781
1327
  tabState.visitedUrls.add(newUrl);
782
- return { ok: true, url: newUrl };
783
- });
1328
+ return { ok: true, url: newUrl, refsAvailable: tabState.refs.size > 0 };
1329
+ }), HANDLER_TIMEOUT_MS, 'click'));
784
1330
 
785
1331
  log('info', 'clicked', { reqId: req.reqId, tabId, url: result.url });
786
1332
  res.json(result);
@@ -883,11 +1429,11 @@ app.post('/tabs/:tabId/back', async (req, res) => {
883
1429
  const { tabState } = found;
884
1430
  tabState.toolCalls++;
885
1431
 
886
- const result = await withTabLock(tabId, async () => {
1432
+ const result = await withTimeout(withTabLock(tabId, async () => {
887
1433
  await tabState.page.goBack({ timeout: 10000 });
888
1434
  tabState.refs = await buildRefs(tabState.page);
889
1435
  return { ok: true, url: tabState.page.url() };
890
- });
1436
+ }), HANDLER_TIMEOUT_MS, 'back');
891
1437
 
892
1438
  res.json(result);
893
1439
  } catch (err) {
@@ -909,11 +1455,11 @@ app.post('/tabs/:tabId/forward', async (req, res) => {
909
1455
  const { tabState } = found;
910
1456
  tabState.toolCalls++;
911
1457
 
912
- const result = await withTabLock(tabId, async () => {
1458
+ const result = await withTimeout(withTabLock(tabId, async () => {
913
1459
  await tabState.page.goForward({ timeout: 10000 });
914
1460
  tabState.refs = await buildRefs(tabState.page);
915
1461
  return { ok: true, url: tabState.page.url() };
916
- });
1462
+ }), HANDLER_TIMEOUT_MS, 'forward');
917
1463
 
918
1464
  res.json(result);
919
1465
  } catch (err) {
@@ -935,11 +1481,11 @@ app.post('/tabs/:tabId/refresh', async (req, res) => {
935
1481
  const { tabState } = found;
936
1482
  tabState.toolCalls++;
937
1483
 
938
- const result = await withTabLock(tabId, async () => {
1484
+ const result = await withTimeout(withTabLock(tabId, async () => {
939
1485
  await tabState.page.reload({ timeout: 30000 });
940
1486
  tabState.refs = await buildRefs(tabState.page);
941
1487
  return { ok: true, url: tabState.page.url() };
942
- });
1488
+ }), HANDLER_TIMEOUT_MS, 'refresh');
943
1489
 
944
1490
  res.json(result);
945
1491
  } catch (err) {
@@ -1039,7 +1585,7 @@ app.delete('/tabs/:tabId', async (req, res) => {
1039
1585
  const session = sessions.get(normalizeUserId(userId));
1040
1586
  const found = session && findTab(session, req.params.tabId);
1041
1587
  if (found) {
1042
- await found.tabState.page.close();
1588
+ await safePageClose(found.tabState.page);
1043
1589
  found.group.delete(req.params.tabId);
1044
1590
  tabLocks.delete(req.params.tabId);
1045
1591
  if (found.group.size === 0) {
@@ -1062,7 +1608,7 @@ app.delete('/tabs/group/:listItemId', async (req, res) => {
1062
1608
  const group = session?.tabGroups.get(req.params.listItemId);
1063
1609
  if (group) {
1064
1610
  for (const [tabId, tabState] of group) {
1065
- await tabState.page.close().catch(() => {});
1611
+ await safePageClose(tabState.page);
1066
1612
  tabLocks.delete(tabId);
1067
1613
  }
1068
1614
  session.tabGroups.delete(req.params.listItemId);
@@ -1085,6 +1631,7 @@ app.delete('/sessions/:userId', async (req, res) => {
1085
1631
  sessions.delete(userId);
1086
1632
  log('info', 'session closed', { userId });
1087
1633
  }
1634
+ if (sessions.size === 0) scheduleBrowserIdleShutdown();
1088
1635
  res.json({ ok: true });
1089
1636
  } catch (err) {
1090
1637
  log('error', 'session close failed', { error: err.message });
@@ -1102,6 +1649,10 @@ setInterval(() => {
1102
1649
  log('info', 'session expired', { userId });
1103
1650
  }
1104
1651
  }
1652
+ // When all sessions gone, start idle timer to kill browser
1653
+ if (sessions.size === 0) {
1654
+ scheduleBrowserIdleShutdown();
1655
+ }
1105
1656
  }, 60_000);
1106
1657
 
1107
1658
  // =============================================================================
@@ -1109,20 +1660,17 @@ setInterval(() => {
1109
1660
  // These allow camoufox to be used as a profile backend for OpenClaw's browser tool
1110
1661
  // =============================================================================
1111
1662
 
1112
- // GET / - Status (alias for GET /health)
1113
- app.get('/', async (req, res) => {
1114
- try {
1115
- const b = await ensureBrowser();
1116
- res.json({
1117
- ok: true,
1118
- enabled: true,
1119
- running: b.isConnected(),
1120
- engine: 'camoufox',
1121
- browserConnected: b.isConnected()
1122
- });
1123
- } catch (err) {
1124
- res.status(500).json({ ok: false, error: safeError(err) });
1125
- }
1663
+ // GET / - Status (passive does not launch browser)
1664
+ app.get('/', (req, res) => {
1665
+ const running = browser !== null && (browser.isConnected?.() ?? false);
1666
+ res.json({
1667
+ ok: true,
1668
+ enabled: true,
1669
+ running,
1670
+ engine: 'camoufox',
1671
+ browserConnected: running,
1672
+ browserRunning: running,
1673
+ });
1126
1674
  });
1127
1675
 
1128
1676
  // GET /tabs - List all tabs (OpenClaw expects this)
@@ -1252,12 +1800,12 @@ app.post('/navigate', async (req, res) => {
1252
1800
  const { tabState } = found;
1253
1801
  tabState.toolCalls++;
1254
1802
 
1255
- const result = await withTabLock(targetId, async () => {
1803
+ const result = await withTimeout(withTabLock(targetId, async () => {
1256
1804
  await tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
1257
1805
  tabState.visitedUrls.add(url);
1258
1806
  tabState.refs = await buildRefs(tabState.page);
1259
1807
  return { ok: true, targetId, url: tabState.page.url() };
1260
- });
1808
+ }), HANDLER_TIMEOUT_MS, 'openclaw-navigate');
1261
1809
 
1262
1810
  res.json(result);
1263
1811
  } catch (err) {
@@ -1270,6 +1818,7 @@ app.post('/navigate', async (req, res) => {
1270
1818
  app.get('/snapshot', async (req, res) => {
1271
1819
  try {
1272
1820
  const { targetId, userId, format = 'text' } = req.query;
1821
+ const offset = parseInt(req.query.offset) || 0;
1273
1822
  if (!userId) {
1274
1823
  return res.status(400).json({ error: 'userId is required' });
1275
1824
  }
@@ -1282,6 +1831,18 @@ app.get('/snapshot', async (req, res) => {
1282
1831
 
1283
1832
  const { tabState } = found;
1284
1833
  tabState.toolCalls++;
1834
+
1835
+ // Cached chunk retrieval
1836
+ if (offset > 0 && tabState.lastSnapshot) {
1837
+ const win = windowSnapshot(tabState.lastSnapshot, offset);
1838
+ const response = { ok: true, format: 'aria', targetId, url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
1839
+ if (req.query.includeScreenshot === 'true') {
1840
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
1841
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
1842
+ }
1843
+ return res.json(response);
1844
+ }
1845
+
1285
1846
  tabState.refs = await buildRefs(tabState.page);
1286
1847
 
1287
1848
  const ariaYaml = await getAriaSnapshot(tabState.page);
@@ -1310,14 +1871,28 @@ app.get('/snapshot', async (req, res) => {
1310
1871
  }).join('\n');
1311
1872
  }
1312
1873
 
1313
- res.json({
1874
+ tabState.lastSnapshot = annotatedYaml;
1875
+ const win = windowSnapshot(annotatedYaml, 0);
1876
+
1877
+ const response = {
1314
1878
  ok: true,
1315
1879
  format: 'aria',
1316
1880
  targetId,
1317
1881
  url: tabState.page.url(),
1318
- snapshot: annotatedYaml,
1319
- refsCount: tabState.refs.size
1320
- });
1882
+ snapshot: win.text,
1883
+ refsCount: tabState.refs.size,
1884
+ truncated: win.truncated,
1885
+ totalChars: win.totalChars,
1886
+ hasMore: win.hasMore,
1887
+ nextOffset: win.nextOffset,
1888
+ };
1889
+
1890
+ if (req.query.includeScreenshot === 'true') {
1891
+ const pngBuffer = await tabState.page.screenshot({ type: 'png' });
1892
+ response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
1893
+ }
1894
+
1895
+ res.json(response);
1321
1896
  } catch (err) {
1322
1897
  log('error', 'openclaw snapshot failed', { reqId: req.reqId, error: err.message });
1323
1898
  res.status(500).json({ error: safeError(err) });
@@ -1346,7 +1921,7 @@ app.post('/act', async (req, res) => {
1346
1921
  const { tabState } = found;
1347
1922
  tabState.toolCalls++;
1348
1923
 
1349
- const result = await withTabLock(targetId, async () => {
1924
+ const result = await withTimeout(withTabLock(targetId, async () => {
1350
1925
  switch (kind) {
1351
1926
  case 'click': {
1352
1927
  const { ref, selector, doubleClick } = params;
@@ -1453,7 +2028,7 @@ app.post('/act', async (req, res) => {
1453
2028
  }
1454
2029
 
1455
2030
  case 'close': {
1456
- await tabState.page.close();
2031
+ await safePageClose(tabState.page);
1457
2032
  found.group.delete(targetId);
1458
2033
  tabLocks.delete(targetId);
1459
2034
  return { ok: true, targetId };
@@ -1462,7 +2037,7 @@ app.post('/act', async (req, res) => {
1462
2037
  default:
1463
2038
  throw new Error(`Unsupported action kind: ${kind}`);
1464
2039
  }
1465
- });
2040
+ }), HANDLER_TIMEOUT_MS, 'act');
1466
2041
 
1467
2042
  res.json(result);
1468
2043
  } catch (err) {
@@ -1490,6 +2065,32 @@ setInterval(() => {
1490
2065
  });
1491
2066
  }, 5 * 60_000);
1492
2067
 
2068
+ // Active health probe — detect hung browser even when isConnected() lies
2069
+ setInterval(async () => {
2070
+ if (!browser || healthState.isRecovering) return;
2071
+ // Skip probe if operations are in flight
2072
+ if (healthState.activeOps > 0) {
2073
+ log('info', 'health probe skipped, operations active', { activeOps: healthState.activeOps });
2074
+ return;
2075
+ }
2076
+ const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
2077
+ if (timeSinceSuccess < 120000) return;
2078
+
2079
+ let testContext;
2080
+ try {
2081
+ testContext = await browser.newContext();
2082
+ const page = await testContext.newPage();
2083
+ await page.goto('about:blank', { timeout: 5000 });
2084
+ await page.close();
2085
+ await testContext.close();
2086
+ healthState.lastSuccessfulNav = Date.now();
2087
+ } catch (err) {
2088
+ log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
2089
+ if (testContext) await testContext.close().catch(() => {});
2090
+ restartBrowser('health probe failed').catch(() => {});
2091
+ }
2092
+ }, 60_000);
2093
+
1493
2094
  // Crash logging
1494
2095
  process.on('uncaughtException', (err) => {
1495
2096
  log('error', 'uncaughtException', { error: err.message, stack: err.stack });
@@ -1528,9 +2129,7 @@ process.on('SIGINT', () => gracefulShutdown('SIGINT'));
1528
2129
  const PORT = CONFIG.port;
1529
2130
  const server = app.listen(PORT, () => {
1530
2131
  log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
1531
- ensureBrowser().catch(err => {
1532
- log('error', 'browser pre-launch failed', { error: err.message });
1533
- });
2132
+ // Browser launches lazily on first request (saves ~550MB when idle)
1534
2133
  });
1535
2134
 
1536
2135
  server.on('error', (err) => {