@askjo/camofox-browser 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +17 -13
- package/README.md +54 -6
- package/lib/config.js +52 -1
- package/lib/fly.js +54 -0
- package/lib/metrics.js +168 -0
- package/lib/proxy.js +277 -0
- package/lib/youtube.js +19 -4
- package/openclaw.plugin.json +1 -1
- package/package.json +2 -1
- package/plugin.ts +2 -1
- package/server.js +728 -143
package/server.js
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import { Camoufox, launchOptions } from 'camoufox-js';
|
|
2
|
+
import { VirtualDisplay } from 'camoufox-js/dist/virtdisplay.js';
|
|
2
3
|
import { firefox } from 'playwright-core';
|
|
3
4
|
import express from 'express';
|
|
4
5
|
import crypto from 'crypto';
|
|
5
6
|
import os from 'os';
|
|
6
7
|
import { expandMacro } from './lib/macros.js';
|
|
7
8
|
import { loadConfig } from './lib/config.js';
|
|
9
|
+
import { normalizePlaywrightProxy, createProxyPool, buildProxyUrl } from './lib/proxy.js';
|
|
10
|
+
import { createFlyHelpers } from './lib/fly.js';
|
|
8
11
|
import { windowSnapshot } from './lib/snapshot.js';
|
|
9
12
|
import {
|
|
10
13
|
MAX_DOWNLOAD_INLINE_BYTES,
|
|
@@ -14,7 +17,15 @@ import {
|
|
|
14
17
|
getDownloadsList,
|
|
15
18
|
extractPageImages,
|
|
16
19
|
} from './lib/downloads.js';
|
|
17
|
-
import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
|
|
20
|
+
import { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
|
|
21
|
+
import {
|
|
22
|
+
register as metricsRegister,
|
|
23
|
+
requestsTotal, requestDuration, pageLoadDuration,
|
|
24
|
+
activeTabsGauge, tabLockQueueDepth,
|
|
25
|
+
tabLockTimeoutsTotal, startMemoryReporter, stopMemoryReporter, actionFromReq,
|
|
26
|
+
failuresTotal, browserRestartsTotal, tabsDestroyedTotal,
|
|
27
|
+
sessionsExpiredTotal, tabsReapedTotal, tabsRecycledTotal, classifyError,
|
|
28
|
+
} from './lib/metrics.js';
|
|
18
29
|
|
|
19
30
|
const CONFIG = loadConfig();
|
|
20
31
|
|
|
@@ -37,23 +48,44 @@ function log(level, msg, fields = {}) {
|
|
|
37
48
|
const app = express();
|
|
38
49
|
app.use(express.json({ limit: '100kb' }));
|
|
39
50
|
|
|
40
|
-
// Request logging middleware
|
|
51
|
+
// Request logging + metrics middleware
|
|
41
52
|
app.use((req, res, next) => {
|
|
42
|
-
if (req.path === '/health') return next();
|
|
43
53
|
const reqId = crypto.randomUUID().slice(0, 8);
|
|
44
54
|
req.reqId = reqId;
|
|
45
55
|
req.startTime = Date.now();
|
|
56
|
+
|
|
46
57
|
const userId = req.body?.userId || req.query?.userId || '-';
|
|
47
|
-
|
|
58
|
+
if (req.path !== '/health') {
|
|
59
|
+
log('info', 'req', { reqId, method: req.method, path: req.path, userId });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const action = actionFromReq(req);
|
|
63
|
+
const done = requestDuration.startTimer({ action });
|
|
64
|
+
|
|
48
65
|
const origEnd = res.end.bind(res);
|
|
49
66
|
res.end = function (...args) {
|
|
50
67
|
const ms = Date.now() - req.startTime;
|
|
51
|
-
|
|
68
|
+
const isErrorStatus = res.statusCode >= 400;
|
|
69
|
+
requestsTotal.labels(action, isErrorStatus ? 'error' : 'success').inc();
|
|
70
|
+
done();
|
|
71
|
+
|
|
72
|
+
if (req.path !== '/health') {
|
|
73
|
+
log('info', 'res', { reqId, status: res.statusCode, ms });
|
|
74
|
+
}
|
|
75
|
+
|
|
52
76
|
return origEnd(...args);
|
|
53
77
|
};
|
|
78
|
+
|
|
54
79
|
next();
|
|
55
80
|
});
|
|
56
81
|
|
|
82
|
+
// --- Horizontal scaling (Fly.io multi-machine) ---
|
|
83
|
+
const fly = createFlyHelpers(CONFIG);
|
|
84
|
+
const FLY_MACHINE_ID = fly.machineId;
|
|
85
|
+
|
|
86
|
+
// Route tab requests to the owning machine via fly-replay header.
|
|
87
|
+
app.use('/tabs/:tabId', fly.replayMiddleware(log));
|
|
88
|
+
|
|
57
89
|
const ALLOWED_URL_SCHEMES = ['http:', 'https:'];
|
|
58
90
|
|
|
59
91
|
// Interactive roles to include - exclude combobox to avoid opening complex widgets
|
|
@@ -203,6 +235,7 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
|
|
|
203
235
|
log('info', 'cookies imported', { reqId: req.reqId, userId: String(userId), count: sanitized.length });
|
|
204
236
|
res.json(result);
|
|
205
237
|
} catch (err) {
|
|
238
|
+
failuresTotal.labels(classifyError(err), 'set_cookies').inc();
|
|
206
239
|
log('error', 'cookie import failed', { reqId: req.reqId, error: err.message });
|
|
207
240
|
res.status(500).json({ error: safeError(err) });
|
|
208
241
|
}
|
|
@@ -229,6 +262,8 @@ const FAILURE_THRESHOLD = 3;
|
|
|
229
262
|
const MAX_CONSECUTIVE_TIMEOUTS = 3;
|
|
230
263
|
const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
|
|
231
264
|
|
|
265
|
+
|
|
266
|
+
|
|
232
267
|
// Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
|
|
233
268
|
// WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
|
|
234
269
|
class TabLock {
|
|
@@ -243,9 +278,12 @@ class TabLock {
|
|
|
243
278
|
entry.timer = setTimeout(() => {
|
|
244
279
|
const idx = this.queue.indexOf(entry);
|
|
245
280
|
if (idx !== -1) this.queue.splice(idx, 1);
|
|
281
|
+
tabLockTimeoutsTotal.inc();
|
|
282
|
+
refreshTabLockQueueDepth();
|
|
246
283
|
reject(new Error('Tab lock queue timeout'));
|
|
247
284
|
}, timeoutMs);
|
|
248
285
|
this.queue.push(entry);
|
|
286
|
+
refreshTabLockQueueDepth();
|
|
249
287
|
this._tryNext();
|
|
250
288
|
});
|
|
251
289
|
}
|
|
@@ -253,6 +291,7 @@ class TabLock {
|
|
|
253
291
|
release() {
|
|
254
292
|
this.active = false;
|
|
255
293
|
this._tryNext();
|
|
294
|
+
refreshTabLockQueueDepth();
|
|
256
295
|
}
|
|
257
296
|
|
|
258
297
|
_tryNext() {
|
|
@@ -260,6 +299,7 @@ class TabLock {
|
|
|
260
299
|
this.active = true;
|
|
261
300
|
const entry = this.queue.shift();
|
|
262
301
|
clearTimeout(entry.timer);
|
|
302
|
+
refreshTabLockQueueDepth();
|
|
263
303
|
entry.resolve();
|
|
264
304
|
}
|
|
265
305
|
|
|
@@ -270,6 +310,7 @@ class TabLock {
|
|
|
270
310
|
entry.reject(new Error('Tab destroyed'));
|
|
271
311
|
}
|
|
272
312
|
this.queue = [];
|
|
313
|
+
refreshTabLockQueueDepth();
|
|
273
314
|
}
|
|
274
315
|
}
|
|
275
316
|
|
|
@@ -302,6 +343,10 @@ function withTimeout(promise, ms, label) {
|
|
|
302
343
|
]);
|
|
303
344
|
}
|
|
304
345
|
|
|
346
|
+
function requestTimeoutMs(baseMs = HANDLER_TIMEOUT_MS) {
|
|
347
|
+
return proxyPool?.canRotateSessions ? Math.max(baseMs, 180000) : baseMs;
|
|
348
|
+
}
|
|
349
|
+
|
|
305
350
|
const userConcurrency = new Map();
|
|
306
351
|
|
|
307
352
|
async function withUserLimit(userId, operation) {
|
|
@@ -355,25 +400,27 @@ function getHostOS() {
|
|
|
355
400
|
return 'linux';
|
|
356
401
|
}
|
|
357
402
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
403
|
+
// Proxy strategy for outbound browsing.
|
|
404
|
+
const proxyPool = createProxyPool(CONFIG.proxy);
|
|
405
|
+
|
|
406
|
+
if (proxyPool) {
|
|
407
|
+
log('info', 'proxy pool created', {
|
|
408
|
+
mode: proxyPool.mode,
|
|
409
|
+
host: proxyPool.canRotateSessions ? CONFIG.proxy.backconnectHost : CONFIG.proxy.host,
|
|
410
|
+
ports: proxyPool.canRotateSessions ? [CONFIG.proxy.backconnectPort] : CONFIG.proxy.ports,
|
|
411
|
+
poolSize: proxyPool.size,
|
|
412
|
+
country: CONFIG.proxy.country || null,
|
|
413
|
+
state: CONFIG.proxy.state || null,
|
|
414
|
+
city: CONFIG.proxy.city || null,
|
|
415
|
+
});
|
|
416
|
+
} else {
|
|
417
|
+
log('info', 'no proxy configured');
|
|
372
418
|
}
|
|
373
419
|
|
|
374
420
|
const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
|
|
375
421
|
let browserIdleTimer = null;
|
|
376
422
|
let browserLaunchPromise = null;
|
|
423
|
+
let browserWarmRetryTimer = null;
|
|
377
424
|
|
|
378
425
|
function scheduleBrowserIdleShutdown() {
|
|
379
426
|
clearBrowserIdleTimer();
|
|
@@ -396,6 +443,21 @@ function clearBrowserIdleTimer() {
|
|
|
396
443
|
}
|
|
397
444
|
}
|
|
398
445
|
|
|
446
|
+
function scheduleBrowserWarmRetry(delayMs = 5000) {
|
|
447
|
+
if (browserWarmRetryTimer || browser || browserLaunchPromise) return;
|
|
448
|
+
browserWarmRetryTimer = setTimeout(async () => {
|
|
449
|
+
browserWarmRetryTimer = null;
|
|
450
|
+
try {
|
|
451
|
+
const start = Date.now();
|
|
452
|
+
await ensureBrowser();
|
|
453
|
+
log('info', 'background browser warm retry succeeded', { ms: Date.now() - start });
|
|
454
|
+
} catch (err) {
|
|
455
|
+
log('warn', 'background browser warm retry failed', { error: err.message, nextDelayMs: delayMs });
|
|
456
|
+
scheduleBrowserWarmRetry(Math.min(delayMs * 2, 30000));
|
|
457
|
+
}
|
|
458
|
+
}, delayMs);
|
|
459
|
+
}
|
|
460
|
+
|
|
399
461
|
// --- Browser health tracking ---
|
|
400
462
|
const healthState = {
|
|
401
463
|
consecutiveNavFailures: 0,
|
|
@@ -417,6 +479,7 @@ function recordNavFailure() {
|
|
|
417
479
|
async function restartBrowser(reason) {
|
|
418
480
|
if (healthState.isRecovering) return;
|
|
419
481
|
healthState.isRecovering = true;
|
|
482
|
+
browserRestartsTotal.labels(reason).inc();
|
|
420
483
|
log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
|
|
421
484
|
try {
|
|
422
485
|
for (const [, session] of sessions) {
|
|
@@ -449,29 +512,157 @@ function getTotalTabCount() {
|
|
|
449
512
|
return total;
|
|
450
513
|
}
|
|
451
514
|
|
|
515
|
+
// Virtual display for WebGL support and anti-detection.
|
|
516
|
+
// Xvfb gives Firefox a real X display with GLX, enabling software-rendered WebGL
|
|
517
|
+
// via Mesa llvmpipe. Without this, WebGL returns "no context" — a massive bot signal.
|
|
518
|
+
let virtualDisplay = null;
|
|
519
|
+
let browserLaunchProxy = null;
|
|
520
|
+
|
|
521
|
+
async function probeGoogleSearch(candidateBrowser) {
|
|
522
|
+
let context = null;
|
|
523
|
+
try {
|
|
524
|
+
context = await candidateBrowser.newContext({
|
|
525
|
+
viewport: { width: 1280, height: 720 },
|
|
526
|
+
permissions: ['geolocation'],
|
|
527
|
+
});
|
|
528
|
+
const page = await context.newPage();
|
|
529
|
+
await page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
530
|
+
await page.waitForTimeout(1200);
|
|
531
|
+
await page.goto('https://www.google.com/search?q=weather%20today', { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
532
|
+
await page.waitForTimeout(4000);
|
|
533
|
+
|
|
534
|
+
const blocked = await isGoogleSearchBlocked(page);
|
|
535
|
+
return {
|
|
536
|
+
ok: !blocked && isGoogleSerp(page.url()),
|
|
537
|
+
url: page.url(),
|
|
538
|
+
blocked,
|
|
539
|
+
};
|
|
540
|
+
} finally {
|
|
541
|
+
await context?.close().catch(() => {});
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
function attachBrowserCleanup(candidateBrowser, localVirtualDisplay) {
|
|
546
|
+
const origClose = candidateBrowser.close.bind(candidateBrowser);
|
|
547
|
+
candidateBrowser.close = async (...args) => {
|
|
548
|
+
await origClose(...args);
|
|
549
|
+
browserLaunchProxy = null;
|
|
550
|
+
if (localVirtualDisplay) {
|
|
551
|
+
localVirtualDisplay.kill();
|
|
552
|
+
if (virtualDisplay === localVirtualDisplay) virtualDisplay = null;
|
|
553
|
+
}
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
|
|
452
557
|
async function launchBrowserInstance() {
|
|
453
558
|
const hostOS = getHostOS();
|
|
454
|
-
const
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
559
|
+
const maxAttempts = proxyPool?.launchRetries ?? 1;
|
|
560
|
+
let lastError = null;
|
|
561
|
+
|
|
562
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
563
|
+
const launchProxy = proxyPool
|
|
564
|
+
? proxyPool.getLaunchProxy(proxyPool.canRotateSessions ? `browser-${crypto.randomUUID().replace(/-/g, '').slice(0, 12)}` : undefined)
|
|
565
|
+
: null;
|
|
566
|
+
|
|
567
|
+
let localVirtualDisplay = null;
|
|
568
|
+
let vdDisplay = undefined;
|
|
569
|
+
let candidateBrowser = null;
|
|
570
|
+
|
|
571
|
+
try {
|
|
572
|
+
if (os.platform() === 'linux') {
|
|
573
|
+
localVirtualDisplay = new VirtualDisplay();
|
|
574
|
+
vdDisplay = localVirtualDisplay.get();
|
|
575
|
+
log('info', 'xvfb virtual display started', { display: vdDisplay, attempt });
|
|
576
|
+
}
|
|
577
|
+
} catch (err) {
|
|
578
|
+
log('warn', 'xvfb not available, falling back to headless', { error: err.message, attempt });
|
|
579
|
+
localVirtualDisplay = null;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
const useVirtualDisplay = !!vdDisplay;
|
|
583
|
+
log('info', 'launching camoufox', {
|
|
584
|
+
hostOS,
|
|
585
|
+
attempt,
|
|
586
|
+
maxAttempts,
|
|
587
|
+
geoip: !!launchProxy,
|
|
588
|
+
proxyMode: proxyPool?.mode || null,
|
|
589
|
+
proxyServer: launchProxy?.server || null,
|
|
590
|
+
proxySession: launchProxy?.sessionId || null,
|
|
591
|
+
proxyPoolSize: proxyPool?.size || 0,
|
|
592
|
+
virtualDisplay: useVirtualDisplay,
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
try {
|
|
596
|
+
const options = await launchOptions({
|
|
597
|
+
headless: useVirtualDisplay ? false : true,
|
|
598
|
+
os: hostOS,
|
|
599
|
+
humanize: true,
|
|
600
|
+
enable_cache: true,
|
|
601
|
+
proxy: launchProxy,
|
|
602
|
+
geoip: !!launchProxy,
|
|
603
|
+
virtual_display: vdDisplay,
|
|
604
|
+
});
|
|
605
|
+
options.proxy = normalizePlaywrightProxy(options.proxy);
|
|
606
|
+
|
|
607
|
+
candidateBrowser = await firefox.launch(options);
|
|
608
|
+
|
|
609
|
+
if (proxyPool?.canRotateSessions) {
|
|
610
|
+
const probe = await probeGoogleSearch(candidateBrowser);
|
|
611
|
+
if (!probe.ok) {
|
|
612
|
+
log('warn', 'browser launch google probe failed', {
|
|
613
|
+
attempt,
|
|
614
|
+
maxAttempts,
|
|
615
|
+
proxySession: launchProxy?.sessionId || null,
|
|
616
|
+
url: probe.url,
|
|
617
|
+
});
|
|
618
|
+
if (attempt < maxAttempts) {
|
|
619
|
+
await candidateBrowser.close().catch(() => {});
|
|
620
|
+
if (localVirtualDisplay) localVirtualDisplay.kill();
|
|
621
|
+
continue;
|
|
622
|
+
}
|
|
623
|
+
// Last attempt: accept browser in degraded mode rather than death-spiraling.
|
|
624
|
+
// Non-Google sites will still work; Google requests will get blocked responses.
|
|
625
|
+
log('error', 'all proxy sessions Google-blocked, accepting browser in degraded mode', {
|
|
626
|
+
maxAttempts,
|
|
627
|
+
proxySession: launchProxy?.sessionId || null,
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
virtualDisplay = localVirtualDisplay;
|
|
633
|
+
browserLaunchProxy = launchProxy;
|
|
634
|
+
browser = candidateBrowser;
|
|
635
|
+
attachBrowserCleanup(browser, localVirtualDisplay);
|
|
636
|
+
|
|
637
|
+
log('info', 'camoufox launched', {
|
|
638
|
+
attempt,
|
|
639
|
+
maxAttempts,
|
|
640
|
+
virtualDisplay: useVirtualDisplay,
|
|
641
|
+
proxyMode: proxyPool?.mode || null,
|
|
642
|
+
proxyServer: launchProxy?.server || null,
|
|
643
|
+
proxySession: launchProxy?.sessionId || null,
|
|
644
|
+
});
|
|
645
|
+
return browser;
|
|
646
|
+
} catch (err) {
|
|
647
|
+
lastError = err;
|
|
648
|
+
log('warn', 'camoufox launch attempt failed', {
|
|
649
|
+
attempt,
|
|
650
|
+
maxAttempts,
|
|
651
|
+
error: err.message,
|
|
652
|
+
proxySession: launchProxy?.sessionId || null,
|
|
653
|
+
});
|
|
654
|
+
await candidateBrowser?.close().catch(() => {});
|
|
655
|
+
if (localVirtualDisplay) localVirtualDisplay.kill();
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
throw lastError || new Error('Failed to launch a usable browser');
|
|
470
660
|
}
|
|
471
661
|
|
|
472
662
|
async function ensureBrowser() {
|
|
473
663
|
clearBrowserIdleTimer();
|
|
474
664
|
if (browser && !browser.isConnected()) {
|
|
665
|
+
failuresTotal.labels('browser_disconnected', 'internal').inc();
|
|
475
666
|
log('warn', 'browser disconnected, clearing dead sessions and relaunching', {
|
|
476
667
|
deadSessions: sessions.size,
|
|
477
668
|
});
|
|
@@ -479,13 +670,20 @@ async function ensureBrowser() {
|
|
|
479
670
|
await session.context.close().catch(() => {});
|
|
480
671
|
}
|
|
481
672
|
sessions.clear();
|
|
673
|
+
// Clean up virtual display from dead browser before relaunching
|
|
674
|
+
if (virtualDisplay) {
|
|
675
|
+
virtualDisplay.kill();
|
|
676
|
+
virtualDisplay = null;
|
|
677
|
+
}
|
|
678
|
+
browserLaunchProxy = null;
|
|
482
679
|
browser = null;
|
|
483
680
|
}
|
|
484
681
|
if (browser) return browser;
|
|
485
682
|
if (browserLaunchPromise) return browserLaunchPromise;
|
|
683
|
+
const launchTimeoutMs = proxyPool?.launchTimeoutMs ?? 60000;
|
|
486
684
|
browserLaunchPromise = Promise.race([
|
|
487
685
|
launchBrowserInstance(),
|
|
488
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error(
|
|
686
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error(`Browser launch timeout (${Math.round(launchTimeoutMs / 1000)}s)`)), launchTimeoutMs)),
|
|
489
687
|
]).finally(() => { browserLaunchPromise = null; });
|
|
490
688
|
return browserLaunchPromise;
|
|
491
689
|
}
|
|
@@ -528,11 +726,26 @@ async function getSession(userId) {
|
|
|
528
726
|
contextOptions.timezoneId = 'America/Los_Angeles';
|
|
529
727
|
contextOptions.geolocation = { latitude: 37.7749, longitude: -122.4194 };
|
|
530
728
|
}
|
|
729
|
+
let sessionProxy = null;
|
|
730
|
+
if (proxyPool?.canRotateSessions) {
|
|
731
|
+
sessionProxy = proxyPool.getNext(`ctx-${key}-${crypto.randomUUID().replace(/-/g, '').slice(0, 8)}`);
|
|
732
|
+
contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
|
|
733
|
+
log('info', 'session proxy assigned', { userId: key, sessionId: sessionProxy.sessionId });
|
|
734
|
+
} else if (proxyPool) {
|
|
735
|
+
sessionProxy = proxyPool.getNext();
|
|
736
|
+
contextOptions.proxy = normalizePlaywrightProxy(sessionProxy);
|
|
737
|
+
log('info', 'session proxy assigned', { userId: key, proxy: sessionProxy.server });
|
|
738
|
+
}
|
|
531
739
|
const context = await b.newContext(contextOptions);
|
|
532
740
|
|
|
533
|
-
session = { context, tabGroups: new Map(), lastAccess: Date.now() };
|
|
741
|
+
session = { context, tabGroups: new Map(), lastAccess: Date.now(), proxySessionId: sessionProxy?.sessionId || null };
|
|
534
742
|
sessions.set(key, session);
|
|
535
|
-
log('info', 'session created', {
|
|
743
|
+
log('info', 'session created', {
|
|
744
|
+
userId: key,
|
|
745
|
+
proxyMode: proxyPool?.mode || null,
|
|
746
|
+
proxyServer: sessionProxy?.server || browserLaunchProxy?.server || null,
|
|
747
|
+
proxySession: sessionProxy?.sessionId || browserLaunchProxy?.sessionId || null,
|
|
748
|
+
});
|
|
536
749
|
}
|
|
537
750
|
session.lastAccess = Date.now();
|
|
538
751
|
return session;
|
|
@@ -571,11 +784,30 @@ function isTabDestroyedError(err) {
|
|
|
571
784
|
|
|
572
785
|
// Centralized error handler for route catch blocks.
|
|
573
786
|
// Auto-destroys dead browser sessions and returns appropriate status codes.
|
|
787
|
+
function isProxyError(err) {
|
|
788
|
+
if (!err) return false;
|
|
789
|
+
const msg = err.message || '';
|
|
790
|
+
return msg.includes('NS_ERROR_PROXY') || msg.includes('proxy connection') || msg.includes('Proxy connection');
|
|
791
|
+
}
|
|
792
|
+
|
|
574
793
|
function handleRouteError(err, req, res, extraFields = {}) {
|
|
794
|
+
const failureType = classifyError(err);
|
|
795
|
+
const action = actionFromReq(req);
|
|
796
|
+
failuresTotal.labels(failureType, action).inc();
|
|
797
|
+
|
|
575
798
|
const userId = req.body?.userId || req.query?.userId;
|
|
576
799
|
if (userId && isDeadContextError(err)) {
|
|
577
800
|
destroySession(userId);
|
|
578
801
|
}
|
|
802
|
+
// Proxy errors mean the session is dead — rotate at context level.
|
|
803
|
+
// Destroy the user's session so the next request gets a fresh context with a new proxy.
|
|
804
|
+
if (isProxyError(err) && proxyPool?.canRotateSessions && userId) {
|
|
805
|
+
log('warn', 'proxy error detected, destroying user session for fresh proxy on next request', {
|
|
806
|
+
action, userId, error: err.message,
|
|
807
|
+
});
|
|
808
|
+
browserRestartsTotal.labels('proxy_error').inc();
|
|
809
|
+
destroySession(userId);
|
|
810
|
+
}
|
|
579
811
|
// Track consecutive timeouts per tab and auto-destroy stuck tabs
|
|
580
812
|
if (userId && isTimeoutError(err)) {
|
|
581
813
|
const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
|
|
@@ -586,7 +818,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
|
|
|
586
818
|
found.tabState.consecutiveTimeouts++;
|
|
587
819
|
if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
|
|
588
820
|
log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
|
|
589
|
-
destroyTab(session, tabId);
|
|
821
|
+
destroyTab(session, tabId, 'consecutive_timeouts');
|
|
590
822
|
}
|
|
591
823
|
}
|
|
592
824
|
}
|
|
@@ -596,7 +828,7 @@ function handleRouteError(err, req, res, extraFields = {}) {
|
|
|
596
828
|
const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
|
|
597
829
|
const session = sessions.get(normalizeUserId(userId));
|
|
598
830
|
if (session && tabId) {
|
|
599
|
-
destroyTab(session, tabId);
|
|
831
|
+
destroyTab(session, tabId, 'lock_queue');
|
|
600
832
|
}
|
|
601
833
|
return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
|
|
602
834
|
}
|
|
@@ -607,25 +839,61 @@ function handleRouteError(err, req, res, extraFields = {}) {
|
|
|
607
839
|
sendError(res, err, extraFields);
|
|
608
840
|
}
|
|
609
841
|
|
|
610
|
-
function destroyTab(session, tabId) {
|
|
842
|
+
function destroyTab(session, tabId, reason) {
|
|
611
843
|
const lock = tabLocks.get(tabId);
|
|
612
844
|
if (lock) {
|
|
613
845
|
lock.drain();
|
|
614
846
|
tabLocks.delete(tabId);
|
|
847
|
+
refreshTabLockQueueDepth();
|
|
615
848
|
}
|
|
616
849
|
for (const [listItemId, group] of session.tabGroups) {
|
|
617
850
|
if (group.has(tabId)) {
|
|
618
851
|
const tabState = group.get(tabId);
|
|
619
|
-
log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
|
|
852
|
+
log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls, reason: reason || 'unknown' });
|
|
620
853
|
safePageClose(tabState.page);
|
|
621
854
|
group.delete(tabId);
|
|
622
855
|
if (group.size === 0) session.tabGroups.delete(listItemId);
|
|
856
|
+
refreshActiveTabsGauge();
|
|
857
|
+
if (reason) tabsDestroyedTotal.labels(reason).inc();
|
|
623
858
|
return true;
|
|
624
859
|
}
|
|
625
860
|
}
|
|
626
861
|
return false;
|
|
627
862
|
}
|
|
628
863
|
|
|
864
|
+
/**
|
|
865
|
+
* Recycle the oldest (least-used) tab in a session to free a slot.
|
|
866
|
+
* Closes the old tab's page and removes it from its group.
|
|
867
|
+
* Returns { recycledTabId, recycledFromGroup } or null if no tab to recycle.
|
|
868
|
+
*/
|
|
869
|
+
async function recycleOldestTab(session, reqId) {
|
|
870
|
+
let oldestTab = null;
|
|
871
|
+
let oldestGroup = null;
|
|
872
|
+
let oldestGroupKey = null;
|
|
873
|
+
let oldestTabId = null;
|
|
874
|
+
for (const [gKey, group] of session.tabGroups) {
|
|
875
|
+
for (const [tid, ts] of group) {
|
|
876
|
+
if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
|
|
877
|
+
oldestTab = ts;
|
|
878
|
+
oldestGroup = group;
|
|
879
|
+
oldestGroupKey = gKey;
|
|
880
|
+
oldestTabId = tid;
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
if (!oldestTab) return null;
|
|
885
|
+
|
|
886
|
+
await safePageClose(oldestTab.page);
|
|
887
|
+
oldestGroup.delete(oldestTabId);
|
|
888
|
+
if (oldestGroup.size === 0) session.tabGroups.delete(oldestGroupKey);
|
|
889
|
+
const lock = tabLocks.get(oldestTabId);
|
|
890
|
+
if (lock) { lock.drain(); tabLocks.delete(oldestTabId); }
|
|
891
|
+
refreshTabLockQueueDepth();
|
|
892
|
+
tabsRecycledTotal.inc();
|
|
893
|
+
log('info', 'tab recycled (limit reached)', { reqId, recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey });
|
|
894
|
+
return { recycledTabId: oldestTabId, recycledFromGroup: oldestGroupKey };
|
|
895
|
+
}
|
|
896
|
+
|
|
629
897
|
function destroySession(userId) {
|
|
630
898
|
const key = normalizeUserId(userId);
|
|
631
899
|
const session = sessions.get(key);
|
|
@@ -654,13 +922,89 @@ function createTabState(page) {
|
|
|
654
922
|
toolCalls: 0,
|
|
655
923
|
consecutiveTimeouts: 0,
|
|
656
924
|
lastSnapshot: null,
|
|
925
|
+
lastRequestedUrl: null,
|
|
926
|
+
googleRetryCount: 0,
|
|
657
927
|
};
|
|
658
928
|
}
|
|
659
929
|
|
|
930
|
+
async function isGoogleUnavailable(page) {
|
|
931
|
+
if (!page || page.isClosed()) return false;
|
|
932
|
+
const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
|
|
933
|
+
return /Unable to connect|502 Bad Gateway or Proxy Error|Camoufox can’t establish a connection/.test(bodyText);
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
async function rotateGoogleTab(userId, sessionKey, tabId, previousTabState, reason, reqId) {
|
|
937
|
+
if (!previousTabState?.lastRequestedUrl || !isGoogleSearchUrl(previousTabState.lastRequestedUrl)) return null;
|
|
938
|
+
if ((previousTabState.googleRetryCount || 0) >= 3) return null;
|
|
939
|
+
|
|
940
|
+
browserRestartsTotal.labels(reason).inc(); // track rotation events (not a full restart)
|
|
941
|
+
|
|
942
|
+
// Rotate at context level — create a fresh context with a new proxy session
|
|
943
|
+
// instead of restarting the entire browser (which kills ALL sessions/tabs).
|
|
944
|
+
const key = normalizeUserId(userId);
|
|
945
|
+
const oldSession = sessions.get(key);
|
|
946
|
+
if (oldSession) {
|
|
947
|
+
await oldSession.context.close().catch(() => {});
|
|
948
|
+
sessions.delete(key);
|
|
949
|
+
}
|
|
950
|
+
const session = await getSession(userId);
|
|
951
|
+
const group = getTabGroup(session, sessionKey);
|
|
952
|
+
const page = await session.context.newPage();
|
|
953
|
+
const tabState = createTabState(page);
|
|
954
|
+
tabState.googleRetryCount = (previousTabState.googleRetryCount || 0) + 1;
|
|
955
|
+
tabState.lastRequestedUrl = previousTabState.lastRequestedUrl;
|
|
956
|
+
attachDownloadListener(tabState, tabId, log);
|
|
957
|
+
group.set(tabId, tabState);
|
|
958
|
+
refreshActiveTabsGauge();
|
|
959
|
+
|
|
960
|
+
log('warn', 'replaying google search on fresh context (per-context proxy rotation)', {
|
|
961
|
+
reqId,
|
|
962
|
+
tabId,
|
|
963
|
+
retryCount: tabState.googleRetryCount,
|
|
964
|
+
url: tabState.lastRequestedUrl,
|
|
965
|
+
proxySession: session.proxySessionId || null,
|
|
966
|
+
});
|
|
967
|
+
|
|
968
|
+
await withPageLoadDuration('navigate', () => page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
969
|
+
tabState.visitedUrls.add('https://www.google.com/');
|
|
970
|
+
await page.waitForTimeout(1200);
|
|
971
|
+
await withPageLoadDuration('navigate', () => page.goto(tabState.lastRequestedUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
972
|
+
tabState.visitedUrls.add(tabState.lastRequestedUrl);
|
|
973
|
+
return { session, tabState };
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
function refreshActiveTabsGauge() {
|
|
977
|
+
activeTabsGauge.set(getTotalTabCount());
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
function refreshTabLockQueueDepth() {
|
|
981
|
+
let queued = 0;
|
|
982
|
+
for (const lock of tabLocks.values()) {
|
|
983
|
+
if (lock?.queue) queued += lock.queue.length;
|
|
984
|
+
}
|
|
985
|
+
tabLockQueueDepth.set(queued);
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
async function withPageLoadDuration(action, fn) {
|
|
989
|
+
const end = pageLoadDuration.startTimer();
|
|
990
|
+
try {
|
|
991
|
+
return await fn();
|
|
992
|
+
} finally {
|
|
993
|
+
end();
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
660
997
|
|
|
661
998
|
|
|
662
999
|
async function waitForPageReady(page, options = {}) {
|
|
663
|
-
const {
|
|
1000
|
+
const {
|
|
1001
|
+
timeout = 10000,
|
|
1002
|
+
waitForNetwork = true,
|
|
1003
|
+
waitForHydration = true,
|
|
1004
|
+
settleMs = 200,
|
|
1005
|
+
hydrationPollMs = 250,
|
|
1006
|
+
hydrationTimeoutMs = Math.min(timeout, 10000),
|
|
1007
|
+
} = options;
|
|
664
1008
|
|
|
665
1009
|
try {
|
|
666
1010
|
await page.waitForLoadState('domcontentloaded', { timeout });
|
|
@@ -671,27 +1015,28 @@ async function waitForPageReady(page, options = {}) {
|
|
|
671
1015
|
});
|
|
672
1016
|
}
|
|
673
1017
|
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
1018
|
+
if (waitForHydration) {
|
|
1019
|
+
const maxIterations = Math.max(1, Math.floor(hydrationTimeoutMs / hydrationPollMs));
|
|
1020
|
+
await page.evaluate(async ({ maxIterations, hydrationPollMs }) => {
|
|
1021
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
1022
|
+
const entries = performance.getEntriesByType('resource');
|
|
1023
|
+
const recentEntries = entries.slice(-5);
|
|
1024
|
+
const netQuiet = recentEntries.every(e => (performance.now() - e.responseEnd) > 400);
|
|
1025
|
+
|
|
1026
|
+
if (document.readyState === 'complete' && netQuiet) {
|
|
1027
|
+
await new Promise(r => requestAnimationFrame(() => requestAnimationFrame(r)));
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
1030
|
+
await new Promise(r => setTimeout(r, hydrationPollMs));
|
|
687
1031
|
}
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
});
|
|
1032
|
+
}, { maxIterations, hydrationPollMs }).catch(() => {
|
|
1033
|
+
log('warn', 'hydration wait failed, continuing');
|
|
1034
|
+
});
|
|
1035
|
+
}
|
|
693
1036
|
|
|
694
|
-
|
|
1037
|
+
if (settleMs > 0) {
|
|
1038
|
+
await page.waitForTimeout(settleMs);
|
|
1039
|
+
}
|
|
695
1040
|
|
|
696
1041
|
// Auto-dismiss common consent/privacy dialogs
|
|
697
1042
|
await dismissConsentDialogs(page);
|
|
@@ -758,6 +1103,25 @@ function isGoogleSerp(url) {
|
|
|
758
1103
|
}
|
|
759
1104
|
}
|
|
760
1105
|
|
|
1106
|
+
function isGoogleSearchUrl(url) {
|
|
1107
|
+
try {
|
|
1108
|
+
const parsed = new URL(url);
|
|
1109
|
+
return parsed.hostname.includes('google.') && parsed.pathname === '/search';
|
|
1110
|
+
} catch {
|
|
1111
|
+
return false;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
async function isGoogleSearchBlocked(page) {
|
|
1116
|
+
if (!page || page.isClosed()) return false;
|
|
1117
|
+
|
|
1118
|
+
const url = page.url();
|
|
1119
|
+
if (url.includes('google.com/sorry/')) return true;
|
|
1120
|
+
|
|
1121
|
+
const bodyText = await page.evaluate(() => document.body?.innerText?.slice(0, 600) || '').catch(() => '');
|
|
1122
|
+
return /Our systems have detected unusual traffic|About this page|If you're having trouble accessing Google Search|SG_REL/.test(bodyText);
|
|
1123
|
+
}
|
|
1124
|
+
|
|
761
1125
|
// --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
|
|
762
1126
|
// Returns { refs: Map, snapshot: string }
|
|
763
1127
|
async function extractGoogleSerp(page) {
|
|
@@ -898,6 +1262,8 @@ async function extractGoogleSerp(page) {
|
|
|
898
1262
|
return { refs, snapshot: extracted.snapshot };
|
|
899
1263
|
}
|
|
900
1264
|
|
|
1265
|
+
const REFRESH_READY_TIMEOUT_MS = 2500;
|
|
1266
|
+
|
|
901
1267
|
async function buildRefs(page) {
|
|
902
1268
|
const refs = new Map();
|
|
903
1269
|
|
|
@@ -916,16 +1282,20 @@ async function buildRefs(page) {
|
|
|
916
1282
|
const start = Date.now();
|
|
917
1283
|
|
|
918
1284
|
// Hard total timeout on the entire buildRefs operation
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
1285
|
+
let timerId;
|
|
1286
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
1287
|
+
timerId = setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS);
|
|
1288
|
+
});
|
|
922
1289
|
|
|
923
1290
|
try {
|
|
924
|
-
|
|
1291
|
+
const result = await Promise.race([
|
|
925
1292
|
_buildRefsInner(page, refs, start),
|
|
926
1293
|
timeoutPromise
|
|
927
1294
|
]);
|
|
1295
|
+
clearTimeout(timerId);
|
|
1296
|
+
return result;
|
|
928
1297
|
} catch (err) {
|
|
1298
|
+
clearTimeout(timerId);
|
|
929
1299
|
if (err.message === 'buildRefs_timeout') {
|
|
930
1300
|
log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
|
|
931
1301
|
return refs;
|
|
@@ -935,7 +1305,12 @@ async function buildRefs(page) {
|
|
|
935
1305
|
}
|
|
936
1306
|
|
|
937
1307
|
async function _buildRefsInner(page, refs, start) {
|
|
938
|
-
await waitForPageReady(page, {
|
|
1308
|
+
await waitForPageReady(page, {
|
|
1309
|
+
timeout: REFRESH_READY_TIMEOUT_MS,
|
|
1310
|
+
waitForNetwork: false,
|
|
1311
|
+
waitForHydration: false,
|
|
1312
|
+
settleMs: 100,
|
|
1313
|
+
});
|
|
939
1314
|
|
|
940
1315
|
// Budget remaining time for ariaSnapshot
|
|
941
1316
|
const elapsed = Date.now() - start;
|
|
@@ -1004,7 +1379,12 @@ async function getAriaSnapshot(page) {
|
|
|
1004
1379
|
if (!page || page.isClosed()) {
|
|
1005
1380
|
return null;
|
|
1006
1381
|
}
|
|
1007
|
-
await waitForPageReady(page, {
|
|
1382
|
+
await waitForPageReady(page, {
|
|
1383
|
+
timeout: REFRESH_READY_TIMEOUT_MS,
|
|
1384
|
+
waitForNetwork: false,
|
|
1385
|
+
waitForHydration: false,
|
|
1386
|
+
settleMs: 100,
|
|
1387
|
+
});
|
|
1008
1388
|
try {
|
|
1009
1389
|
return await page.locator('body').ariaSnapshot({ timeout: 5000 });
|
|
1010
1390
|
} catch (err) {
|
|
@@ -1027,11 +1407,46 @@ function refToLocator(page, ref, refs) {
|
|
|
1027
1407
|
return locator;
|
|
1028
1408
|
}
|
|
1029
1409
|
|
|
1410
|
+
async function refreshTabRefs(tabState, options = {}) {
|
|
1411
|
+
const {
|
|
1412
|
+
reason = 'refresh',
|
|
1413
|
+
timeoutMs = null,
|
|
1414
|
+
preserveExistingOnEmpty = true,
|
|
1415
|
+
} = options;
|
|
1416
|
+
|
|
1417
|
+
const beforeUrl = tabState.page?.url?.() || '';
|
|
1418
|
+
const existingRefs = tabState.refs instanceof Map ? tabState.refs : new Map();
|
|
1419
|
+
const refreshPromise = buildRefs(tabState.page);
|
|
1420
|
+
|
|
1421
|
+
let refreshedRefs;
|
|
1422
|
+
if (timeoutMs) {
|
|
1423
|
+
const timeoutLabel = `${reason}_refs_timeout`;
|
|
1424
|
+
refreshedRefs = await Promise.race([
|
|
1425
|
+
refreshPromise,
|
|
1426
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error(timeoutLabel)), timeoutMs)),
|
|
1427
|
+
]);
|
|
1428
|
+
} else {
|
|
1429
|
+
refreshedRefs = await refreshPromise;
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
const afterUrl = tabState.page?.url?.() || beforeUrl;
|
|
1433
|
+
if (preserveExistingOnEmpty && refreshedRefs.size === 0 && existingRefs.size > 0 && beforeUrl === afterUrl) {
|
|
1434
|
+
log('warn', 'preserving previous refs after empty rebuild', {
|
|
1435
|
+
reason,
|
|
1436
|
+
url: afterUrl,
|
|
1437
|
+
previousRefs: existingRefs.size,
|
|
1438
|
+
});
|
|
1439
|
+
return existingRefs;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
return refreshedRefs;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1030
1445
|
// --- YouTube transcript ---
|
|
1031
1446
|
// Implementation extracted to lib/youtube.js to avoid scanner false positives
|
|
1032
1447
|
// (child_process + app.post in same file triggers OpenClaw skill-scanner)
|
|
1033
1448
|
|
|
1034
|
-
detectYtDlp(log);
|
|
1449
|
+
await detectYtDlp(log);
|
|
1035
1450
|
|
|
1036
1451
|
app.post('/youtube/transcript', async (req, res) => {
|
|
1037
1452
|
const reqId = req.reqId;
|
|
@@ -1051,14 +1466,23 @@ app.post('/youtube/transcript', async (req, res) => {
|
|
|
1051
1466
|
const videoId = videoIdMatch[1];
|
|
1052
1467
|
const lang = languages[0] || 'en';
|
|
1053
1468
|
|
|
1054
|
-
|
|
1469
|
+
// Re-detect yt-dlp if startup detection failed (transient issue)
|
|
1470
|
+
await ensureYtDlp(log);
|
|
1471
|
+
|
|
1472
|
+
const ytDlpProxyUrl = buildProxyUrl(proxyPool, CONFIG.proxy);
|
|
1473
|
+
log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser', hasProxy: !!ytDlpProxyUrl });
|
|
1055
1474
|
|
|
1056
1475
|
let result;
|
|
1057
1476
|
if (hasYtDlp()) {
|
|
1058
1477
|
try {
|
|
1059
|
-
result = await ytDlpTranscript(reqId, url, videoId, lang);
|
|
1478
|
+
result = await ytDlpTranscript(reqId, url, videoId, lang, ytDlpProxyUrl);
|
|
1060
1479
|
} catch (ytErr) {
|
|
1061
|
-
log('warn', 'yt-dlp
|
|
1480
|
+
log('warn', 'yt-dlp threw, falling back to browser', { reqId, error: ytErr.message });
|
|
1481
|
+
result = null;
|
|
1482
|
+
}
|
|
1483
|
+
// If yt-dlp returned an error result (e.g. no captions) or threw, try browser
|
|
1484
|
+
if (!result || result.status !== 'ok') {
|
|
1485
|
+
if (result) log('warn', 'yt-dlp returned error, falling back to browser', { reqId, status: result.status, code: result.code });
|
|
1062
1486
|
result = await browserTranscript(reqId, url, videoId, lang);
|
|
1063
1487
|
}
|
|
1064
1488
|
} else {
|
|
@@ -1068,6 +1492,7 @@ app.post('/youtube/transcript', async (req, res) => {
|
|
|
1068
1492
|
log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
|
|
1069
1493
|
res.json(result);
|
|
1070
1494
|
} catch (err) {
|
|
1495
|
+
failuresTotal.labels(classifyError(err), 'youtube_transcript').inc();
|
|
1071
1496
|
log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
|
|
1072
1497
|
res.status(500).json({ error: safeError(err) });
|
|
1073
1498
|
}
|
|
@@ -1186,6 +1611,16 @@ async function browserTranscript(reqId, url, videoId, lang) {
|
|
|
1186
1611
|
};
|
|
1187
1612
|
} finally {
|
|
1188
1613
|
await safePageClose(page);
|
|
1614
|
+
// Clean up phantom transcript session if no tabs remain
|
|
1615
|
+
const ytSession = sessions.get(normalizeUserId('__yt_transcript__'));
|
|
1616
|
+
if (ytSession) {
|
|
1617
|
+
let totalTabs = 0;
|
|
1618
|
+
for (const g of ytSession.tabGroups.values()) totalTabs += g.size;
|
|
1619
|
+
if (totalTabs === 0) {
|
|
1620
|
+
ytSession.context.close().catch(() => {});
|
|
1621
|
+
sessions.delete(normalizeUserId('__yt_transcript__'));
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1189
1624
|
}
|
|
1190
1625
|
});
|
|
1191
1626
|
}
|
|
@@ -1195,16 +1630,34 @@ app.get('/health', (req, res) => {
|
|
|
1195
1630
|
return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
|
|
1196
1631
|
}
|
|
1197
1632
|
const running = browser !== null && (browser.isConnected?.() ?? false);
|
|
1633
|
+
if (proxyPool?.canRotateSessions && !running) {
|
|
1634
|
+
scheduleBrowserWarmRetry();
|
|
1635
|
+
return res.status(503).json({
|
|
1636
|
+
ok: false,
|
|
1637
|
+
engine: 'camoufox',
|
|
1638
|
+
browserConnected: false,
|
|
1639
|
+
browserRunning: false,
|
|
1640
|
+
warming: true,
|
|
1641
|
+
...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
|
|
1642
|
+
});
|
|
1643
|
+
}
|
|
1198
1644
|
res.json({
|
|
1199
1645
|
ok: true,
|
|
1200
1646
|
engine: 'camoufox',
|
|
1201
1647
|
browserConnected: running,
|
|
1202
1648
|
browserRunning: running,
|
|
1203
1649
|
activeTabs: getTotalTabCount(),
|
|
1650
|
+
activeSessions: sessions.size,
|
|
1204
1651
|
consecutiveFailures: healthState.consecutiveNavFailures,
|
|
1652
|
+
...(FLY_MACHINE_ID ? { machineId: FLY_MACHINE_ID } : {}),
|
|
1205
1653
|
});
|
|
1206
1654
|
});
|
|
1207
1655
|
|
|
1656
|
+
app.get('/metrics', async (_req, res) => {
|
|
1657
|
+
res.set('Content-Type', metricsRegister.contentType);
|
|
1658
|
+
res.send(await metricsRegister.metrics());
|
|
1659
|
+
});
|
|
1660
|
+
|
|
1208
1661
|
// Create new tab
|
|
1209
1662
|
app.post('/tabs', async (req, res) => {
|
|
1210
1663
|
try {
|
|
@@ -1220,32 +1673,35 @@ app.post('/tabs', async (req, res) => {
|
|
|
1220
1673
|
|
|
1221
1674
|
let totalTabs = 0;
|
|
1222
1675
|
for (const group of session.tabGroups.values()) totalTabs += group.size;
|
|
1223
|
-
if (totalTabs >= MAX_TABS_PER_SESSION) {
|
|
1224
|
-
throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
|
|
1225
|
-
}
|
|
1226
1676
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1677
|
+
// Recycle oldest tab when limits are reached instead of rejecting
|
|
1678
|
+
if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
|
|
1679
|
+
const recycled = await recycleOldestTab(session, req.reqId);
|
|
1680
|
+
if (!recycled) {
|
|
1681
|
+
throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
|
|
1682
|
+
}
|
|
1229
1683
|
}
|
|
1230
1684
|
|
|
1231
1685
|
const group = getTabGroup(session, resolvedSessionKey);
|
|
1232
1686
|
|
|
1233
1687
|
const page = await session.context.newPage();
|
|
1234
|
-
const tabId =
|
|
1688
|
+
const tabId = fly.makeTabId();
|
|
1235
1689
|
const tabState = createTabState(page);
|
|
1236
1690
|
attachDownloadListener(tabState, tabId);
|
|
1237
1691
|
group.set(tabId, tabState);
|
|
1692
|
+
refreshActiveTabsGauge();
|
|
1238
1693
|
|
|
1239
1694
|
if (url) {
|
|
1240
1695
|
const urlErr = validateUrl(url);
|
|
1241
1696
|
if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
|
|
1242
|
-
|
|
1697
|
+
tabState.lastRequestedUrl = url;
|
|
1698
|
+
await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
1243
1699
|
tabState.visitedUrls.add(url);
|
|
1244
1700
|
}
|
|
1245
1701
|
|
|
1246
1702
|
log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
|
|
1247
1703
|
return { tabId, url: page.url() };
|
|
1248
|
-
})(),
|
|
1704
|
+
})(), requestTimeoutMs(), 'tab create');
|
|
1249
1705
|
|
|
1250
1706
|
res.json(result);
|
|
1251
1707
|
} catch (err) {
|
|
@@ -1264,45 +1720,29 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1264
1720
|
|
|
1265
1721
|
const result = await withUserLimit(userId, () => withTimeout((async () => {
|
|
1266
1722
|
await ensureBrowser();
|
|
1723
|
+
const resolvedSessionKey = sessionKey || listItemId || 'default';
|
|
1267
1724
|
let session = sessions.get(normalizeUserId(userId));
|
|
1268
1725
|
let found = session && findTab(session, tabId);
|
|
1269
1726
|
|
|
1270
1727
|
let tabState;
|
|
1271
1728
|
if (!found) {
|
|
1272
|
-
const resolvedSessionKey = sessionKey || listItemId || 'default';
|
|
1273
1729
|
session = await getSession(userId);
|
|
1274
1730
|
let sessionTabs = 0;
|
|
1275
1731
|
for (const g of session.tabGroups.values()) sessionTabs += g.size;
|
|
1276
1732
|
if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
|
|
1277
|
-
//
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
let oldestTabId = null;
|
|
1281
|
-
for (const [gKey, group] of session.tabGroups) {
|
|
1282
|
-
for (const [tid, ts] of group) {
|
|
1283
|
-
if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
|
|
1284
|
-
oldestTab = ts;
|
|
1285
|
-
oldestGroup = group;
|
|
1286
|
-
oldestTabId = tid;
|
|
1287
|
-
}
|
|
1288
|
-
}
|
|
1289
|
-
}
|
|
1290
|
-
if (oldestTab) {
|
|
1291
|
-
tabState = oldestTab;
|
|
1292
|
-
const group = getTabGroup(session, resolvedSessionKey);
|
|
1293
|
-
if (oldestGroup) oldestGroup.delete(oldestTabId);
|
|
1294
|
-
group.set(tabId, tabState);
|
|
1295
|
-
{ const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
|
|
1296
|
-
log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
|
|
1297
|
-
} else {
|
|
1733
|
+
// Recycle oldest tab to free a slot, then create new page
|
|
1734
|
+
const recycled = await recycleOldestTab(session, req.reqId);
|
|
1735
|
+
if (!recycled) {
|
|
1298
1736
|
throw new Error('Maximum tabs per session reached');
|
|
1299
1737
|
}
|
|
1300
|
-
}
|
|
1738
|
+
}
|
|
1739
|
+
{
|
|
1301
1740
|
const page = await session.context.newPage();
|
|
1302
1741
|
tabState = createTabState(page);
|
|
1303
1742
|
attachDownloadListener(tabState, tabId, log);
|
|
1304
1743
|
const group = getTabGroup(session, resolvedSessionKey);
|
|
1305
1744
|
group.set(tabId, tabState);
|
|
1745
|
+
refreshActiveTabsGauge();
|
|
1306
1746
|
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
1307
1747
|
}
|
|
1308
1748
|
} else {
|
|
@@ -1311,7 +1751,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1311
1751
|
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1312
1752
|
|
|
1313
1753
|
let targetUrl = url;
|
|
1314
|
-
if (macro) {
|
|
1754
|
+
if (macro && macro !== '__NO__' && macro !== 'none' && macro !== 'null') {
|
|
1315
1755
|
targetUrl = expandMacro(macro, query) || url;
|
|
1316
1756
|
}
|
|
1317
1757
|
|
|
@@ -1321,9 +1761,61 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1321
1761
|
if (urlErr) throw new Error(urlErr);
|
|
1322
1762
|
|
|
1323
1763
|
return await withTabLock(tabId, async () => {
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1764
|
+
const currentSessionKey = found?.listItemId || resolvedSessionKey;
|
|
1765
|
+
const isGoogleSearch = isGoogleSearchUrl(targetUrl);
|
|
1766
|
+
|
|
1767
|
+
const navigateCurrentPage = async () => {
|
|
1768
|
+
tabState.lastRequestedUrl = targetUrl;
|
|
1769
|
+
await withPageLoadDuration('navigate', () => tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
1770
|
+
tabState.visitedUrls.add(targetUrl);
|
|
1771
|
+
tabState.lastSnapshot = null;
|
|
1772
|
+
};
|
|
1773
|
+
|
|
1774
|
+
const prewarmGoogleHome = async () => {
|
|
1775
|
+
if (!isGoogleSearch || tabState.visitedUrls.has('https://www.google.com/')) return;
|
|
1776
|
+
await withPageLoadDuration('navigate', () => tabState.page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
1777
|
+
tabState.visitedUrls.add('https://www.google.com/');
|
|
1778
|
+
await tabState.page.waitForTimeout(1200);
|
|
1779
|
+
};
|
|
1780
|
+
|
|
1781
|
+
const recreateTabOnFreshContext = async () => {
|
|
1782
|
+
const previousRetryCount = tabState.googleRetryCount || 0;
|
|
1783
|
+
browserRestartsTotal.labels('google_search_block').inc();
|
|
1784
|
+
// Rotate at context level — destroy this user's session and create
|
|
1785
|
+
// a fresh one with a new proxy session. Does NOT restart the browser.
|
|
1786
|
+
const key = normalizeUserId(userId);
|
|
1787
|
+
const oldSession = sessions.get(key);
|
|
1788
|
+
if (oldSession) {
|
|
1789
|
+
await oldSession.context.close().catch(() => {});
|
|
1790
|
+
sessions.delete(key);
|
|
1791
|
+
}
|
|
1792
|
+
session = await getSession(userId);
|
|
1793
|
+
const group = getTabGroup(session, currentSessionKey);
|
|
1794
|
+
const page = await session.context.newPage();
|
|
1795
|
+
tabState = createTabState(page);
|
|
1796
|
+
tabState.googleRetryCount = previousRetryCount + 1;
|
|
1797
|
+
attachDownloadListener(tabState, tabId, log);
|
|
1798
|
+
group.set(tabId, tabState);
|
|
1799
|
+
refreshActiveTabsGauge();
|
|
1800
|
+
};
|
|
1801
|
+
|
|
1802
|
+
if (isGoogleSearch && proxyPool?.canRotateSessions) {
|
|
1803
|
+
await prewarmGoogleHome();
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
await navigateCurrentPage();
|
|
1807
|
+
|
|
1808
|
+
if (isGoogleSearch && proxyPool?.canRotateSessions && await isGoogleSearchBlocked(tabState.page)) {
|
|
1809
|
+
log('warn', 'google search blocked, rotating browser proxy session', {
|
|
1810
|
+
reqId: req.reqId,
|
|
1811
|
+
tabId,
|
|
1812
|
+
url: tabState.page.url(),
|
|
1813
|
+
proxySession: browserLaunchProxy?.sessionId || null,
|
|
1814
|
+
});
|
|
1815
|
+
await recreateTabOnFreshContext();
|
|
1816
|
+
await prewarmGoogleHome();
|
|
1817
|
+
await navigateCurrentPage();
|
|
1818
|
+
}
|
|
1327
1819
|
|
|
1328
1820
|
// For Google SERP: skip eager ref building during navigate.
|
|
1329
1821
|
// Results render asynchronously after DOMContentLoaded — the snapshot
|
|
@@ -1332,18 +1824,22 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1332
1824
|
tabState.refs = new Map();
|
|
1333
1825
|
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
|
|
1334
1826
|
}
|
|
1827
|
+
|
|
1828
|
+
if (isGoogleSearch && await isGoogleSearchBlocked(tabState.page)) {
|
|
1829
|
+
return { ok: false, tabId, url: tabState.page.url(), refsAvailable: false, googleBlocked: true };
|
|
1830
|
+
}
|
|
1335
1831
|
|
|
1336
1832
|
tabState.refs = await buildRefs(tabState.page);
|
|
1337
1833
|
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
|
|
1338
|
-
});
|
|
1339
|
-
})(),
|
|
1834
|
+
}, requestTimeoutMs());
|
|
1835
|
+
})(), requestTimeoutMs(), 'navigate'));
|
|
1340
1836
|
|
|
1341
1837
|
log('info', 'navigated', { reqId: req.reqId, tabId, url: result.url });
|
|
1342
1838
|
res.json(result);
|
|
1343
1839
|
} catch (err) {
|
|
1344
1840
|
log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
|
|
1345
|
-
const
|
|
1346
|
-
if (
|
|
1841
|
+
const is400 = err.message && (err.message.startsWith('Blocked URL scheme') || err.message === 'url or macro required');
|
|
1842
|
+
if (is400) {
|
|
1347
1843
|
return res.status(400).json({ error: safeError(err) });
|
|
1348
1844
|
}
|
|
1349
1845
|
handleRouteError(err, req, res);
|
|
@@ -1377,6 +1873,25 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1377
1873
|
}
|
|
1378
1874
|
|
|
1379
1875
|
const result = await withUserLimit(userId, () => withTimeout((async () => {
|
|
1876
|
+
if (proxyPool?.canRotateSessions && isGoogleSearchUrl(tabState.lastRequestedUrl || '')) {
|
|
1877
|
+
const blocked = await isGoogleSearchBlocked(tabState.page);
|
|
1878
|
+
const unavailable = !blocked && await isGoogleUnavailable(tabState.page);
|
|
1879
|
+
if (blocked || unavailable) {
|
|
1880
|
+
const rotated = await rotateGoogleTab(userId, found.listItemId, req.params.tabId, tabState, blocked ? 'google_search_block_snapshot' : 'google_search_unavailable_snapshot', req.reqId);
|
|
1881
|
+
if (rotated) {
|
|
1882
|
+
tabState.page = rotated.tabState.page;
|
|
1883
|
+
tabState.refs = rotated.tabState.refs;
|
|
1884
|
+
tabState.visitedUrls = rotated.tabState.visitedUrls;
|
|
1885
|
+
tabState.downloads = rotated.tabState.downloads;
|
|
1886
|
+
tabState.toolCalls = rotated.tabState.toolCalls;
|
|
1887
|
+
tabState.consecutiveTimeouts = rotated.tabState.consecutiveTimeouts;
|
|
1888
|
+
tabState.lastSnapshot = rotated.tabState.lastSnapshot;
|
|
1889
|
+
tabState.lastRequestedUrl = rotated.tabState.lastRequestedUrl;
|
|
1890
|
+
tabState.googleRetryCount = rotated.tabState.googleRetryCount;
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1380
1895
|
const pageUrl = tabState.page.url();
|
|
1381
1896
|
|
|
1382
1897
|
// Google SERP fast path — DOM extraction instead of ariaSnapshot
|
|
@@ -1402,7 +1917,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1402
1917
|
return response;
|
|
1403
1918
|
}
|
|
1404
1919
|
|
|
1405
|
-
tabState.refs = await
|
|
1920
|
+
tabState.refs = await refreshTabRefs(tabState, { reason: 'snapshot' });
|
|
1406
1921
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
1407
1922
|
|
|
1408
1923
|
let annotatedYaml = ariaYaml || '';
|
|
@@ -1458,7 +1973,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1458
1973
|
}
|
|
1459
1974
|
|
|
1460
1975
|
return response;
|
|
1461
|
-
})(),
|
|
1976
|
+
})(), requestTimeoutMs(), 'snapshot'));
|
|
1462
1977
|
|
|
1463
1978
|
log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
|
|
1464
1979
|
res.json(result);
|
|
@@ -1576,9 +2091,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1576
2091
|
log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
|
|
1577
2092
|
try {
|
|
1578
2093
|
const preClickBudget = Math.min(4000, remainingBudget());
|
|
1579
|
-
|
|
1580
|
-
const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
|
|
1581
|
-
tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
|
|
2094
|
+
tabState.refs = await refreshTabRefs(tabState, { reason: 'pre_click', timeoutMs: preClickBudget });
|
|
1582
2095
|
} catch (e) {
|
|
1583
2096
|
if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
|
|
1584
2097
|
log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
|
|
@@ -1618,9 +2131,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1618
2131
|
// If it times out, return without refs (caller's next /snapshot will rebuild them).
|
|
1619
2132
|
const postClickBudget = Math.max(2000, remainingBudget());
|
|
1620
2133
|
try {
|
|
1621
|
-
|
|
1622
|
-
const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
|
|
1623
|
-
tabState.refs = await Promise.race([refsPromise, refsBudget]);
|
|
2134
|
+
tabState.refs = await refreshTabRefs(tabState, { reason: 'post_click', timeoutMs: postClickBudget });
|
|
1624
2135
|
} catch (e) {
|
|
1625
2136
|
if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
|
|
1626
2137
|
log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
|
|
@@ -1644,7 +2155,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1644
2155
|
const session = sessions.get(normalizeUserId(req.body.userId));
|
|
1645
2156
|
const found = session && findTab(session, tabId);
|
|
1646
2157
|
if (found?.tabState?.page && !found.tabState.page.isClosed()) {
|
|
1647
|
-
found.tabState.refs = await
|
|
2158
|
+
found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'click_timeout' });
|
|
1648
2159
|
found.tabState.lastSnapshot = null;
|
|
1649
2160
|
return res.status(500).json({
|
|
1650
2161
|
error: safeError(err),
|
|
@@ -1683,7 +2194,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
|
|
|
1683
2194
|
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1684
2195
|
if (!locator) {
|
|
1685
2196
|
log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
|
|
1686
|
-
tabState.refs = await
|
|
2197
|
+
tabState.refs = await refreshTabRefs(tabState, { reason: 'type' });
|
|
1687
2198
|
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1688
2199
|
}
|
|
1689
2200
|
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
@@ -1701,7 +2212,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
|
|
|
1701
2212
|
const session = sessions.get(normalizeUserId(req.body.userId));
|
|
1702
2213
|
const found = session && findTab(session, tabId);
|
|
1703
2214
|
if (found?.tabState?.page && !found.tabState.page.isClosed()) {
|
|
1704
|
-
found.tabState.refs = await
|
|
2215
|
+
found.tabState.refs = await refreshTabRefs(found.tabState, { reason: 'type_timeout' });
|
|
1705
2216
|
found.tabState.lastSnapshot = null;
|
|
1706
2217
|
return res.status(500).json({
|
|
1707
2218
|
error: safeError(err),
|
|
@@ -1753,8 +2264,9 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
|
|
|
1753
2264
|
const { tabState } = found;
|
|
1754
2265
|
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1755
2266
|
|
|
1756
|
-
const
|
|
1757
|
-
|
|
2267
|
+
const isVertical = direction === 'up' || direction === 'down';
|
|
2268
|
+
const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
|
|
2269
|
+
await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
|
|
1758
2270
|
await tabState.page.waitForTimeout(300);
|
|
1759
2271
|
|
|
1760
2272
|
res.json({ ok: true });
|
|
@@ -1778,7 +2290,17 @@ app.post('/tabs/:tabId/back', async (req, res) => {
|
|
|
1778
2290
|
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1779
2291
|
|
|
1780
2292
|
const result = await withTabLock(tabId, async () => {
|
|
1781
|
-
|
|
2293
|
+
try {
|
|
2294
|
+
await tabState.page.goBack({ timeout: 10000 });
|
|
2295
|
+
} catch (navErr) {
|
|
2296
|
+
// NS_BINDING_CANCELLED_OLD_LOAD: Firefox cancels the old load when going back.
|
|
2297
|
+
// The navigation itself succeeded — just the prior page's load was interrupted.
|
|
2298
|
+
if (navErr.message && navErr.message.includes('NS_BINDING_CANCELLED')) {
|
|
2299
|
+
log('info', 'goBack cancelled old load (expected)', { reqId: req.reqId, tabId });
|
|
2300
|
+
} else {
|
|
2301
|
+
throw navErr;
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
1782
2304
|
tabState.refs = await buildRefs(tabState.page);
|
|
1783
2305
|
return { ok: true, url: tabState.page.url() };
|
|
1784
2306
|
});
|
|
@@ -1906,6 +2428,7 @@ app.get('/tabs/:tabId/downloads', async (req, res) => {
|
|
|
1906
2428
|
|
|
1907
2429
|
res.json({ tabId: req.params.tabId, downloads });
|
|
1908
2430
|
} catch (err) {
|
|
2431
|
+
failuresTotal.labels(classifyError(err), 'downloads').inc();
|
|
1909
2432
|
log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
|
|
1910
2433
|
res.status(500).json({ error: safeError(err) });
|
|
1911
2434
|
}
|
|
@@ -1931,6 +2454,7 @@ app.get('/tabs/:tabId/images', async (req, res) => {
|
|
|
1931
2454
|
|
|
1932
2455
|
res.json({ tabId: req.params.tabId, images });
|
|
1933
2456
|
} catch (err) {
|
|
2457
|
+
failuresTotal.labels(classifyError(err), 'images').inc();
|
|
1934
2458
|
log('error', 'images failed', { reqId: req.reqId, error: err.message });
|
|
1935
2459
|
res.status(500).json({ error: safeError(err) });
|
|
1936
2460
|
}
|
|
@@ -1999,6 +2523,7 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
|
|
|
1999
2523
|
log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
|
|
2000
2524
|
res.json({ ok: true, result });
|
|
2001
2525
|
} catch (err) {
|
|
2526
|
+
failuresTotal.labels(classifyError(err), 'evaluate').inc();
|
|
2002
2527
|
log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
|
|
2003
2528
|
res.status(500).json({ error: safeError(err) });
|
|
2004
2529
|
}
|
|
@@ -2007,17 +2532,19 @@ app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, re
|
|
|
2007
2532
|
// Close tab
|
|
2008
2533
|
app.delete('/tabs/:tabId', async (req, res) => {
|
|
2009
2534
|
try {
|
|
2010
|
-
const
|
|
2535
|
+
const userId = req.query.userId || req.body?.userId;
|
|
2536
|
+
if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
|
|
2011
2537
|
const session = sessions.get(normalizeUserId(userId));
|
|
2012
2538
|
const found = session && findTab(session, req.params.tabId);
|
|
2013
2539
|
if (found) {
|
|
2014
2540
|
await clearTabDownloads(found.tabState);
|
|
2015
2541
|
await safePageClose(found.tabState.page);
|
|
2016
2542
|
found.group.delete(req.params.tabId);
|
|
2017
|
-
{ const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); }
|
|
2543
|
+
{ const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); refreshTabLockQueueDepth(); }
|
|
2018
2544
|
if (found.group.size === 0) {
|
|
2019
2545
|
session.tabGroups.delete(found.listItemId);
|
|
2020
2546
|
}
|
|
2547
|
+
refreshActiveTabsGauge();
|
|
2021
2548
|
log('info', 'tab closed', { reqId: req.reqId, tabId: req.params.tabId, userId });
|
|
2022
2549
|
}
|
|
2023
2550
|
res.json({ ok: true });
|
|
@@ -2030,16 +2557,23 @@ app.delete('/tabs/:tabId', async (req, res) => {
|
|
|
2030
2557
|
// Close tab group
|
|
2031
2558
|
app.delete('/tabs/group/:listItemId', async (req, res) => {
|
|
2032
2559
|
try {
|
|
2033
|
-
const
|
|
2560
|
+
const userId = req.query.userId || req.body?.userId;
|
|
2561
|
+
if (!userId) return res.status(400).json({ error: 'userId required (query or body)' });
|
|
2034
2562
|
const session = sessions.get(normalizeUserId(userId));
|
|
2035
2563
|
const group = session?.tabGroups.get(req.params.listItemId);
|
|
2036
2564
|
if (group) {
|
|
2037
2565
|
for (const [tabId, tabState] of group) {
|
|
2038
2566
|
await clearTabDownloads(tabState);
|
|
2039
2567
|
await safePageClose(tabState.page);
|
|
2040
|
-
tabLocks.
|
|
2568
|
+
const lock = tabLocks.get(tabId);
|
|
2569
|
+
if (lock) {
|
|
2570
|
+
lock.drain();
|
|
2571
|
+
tabLocks.delete(tabId);
|
|
2572
|
+
}
|
|
2041
2573
|
}
|
|
2042
2574
|
session.tabGroups.delete(req.params.listItemId);
|
|
2575
|
+
refreshTabLockQueueDepth();
|
|
2576
|
+
refreshActiveTabsGauge();
|
|
2043
2577
|
log('info', 'tab group closed', { reqId: req.reqId, listItemId: req.params.listItemId, userId });
|
|
2044
2578
|
}
|
|
2045
2579
|
res.json({ ok: true });
|
|
@@ -2058,6 +2592,18 @@ app.delete('/sessions/:userId', async (req, res) => {
|
|
|
2058
2592
|
await clearSessionDownloads(session);
|
|
2059
2593
|
await session.context.close();
|
|
2060
2594
|
sessions.delete(userId);
|
|
2595
|
+
// Remove any lingering tab locks for the session
|
|
2596
|
+
for (const [listItemId, group] of session.tabGroups) {
|
|
2597
|
+
for (const tabId of group.keys()) {
|
|
2598
|
+
const lock = tabLocks.get(tabId);
|
|
2599
|
+
if (lock) {
|
|
2600
|
+
lock.drain();
|
|
2601
|
+
tabLocks.delete(tabId);
|
|
2602
|
+
}
|
|
2603
|
+
}
|
|
2604
|
+
}
|
|
2605
|
+
refreshTabLockQueueDepth();
|
|
2606
|
+
refreshActiveTabsGauge();
|
|
2061
2607
|
log('info', 'session closed', { userId });
|
|
2062
2608
|
}
|
|
2063
2609
|
if (sessions.size === 0) scheduleBrowserIdleShutdown();
|
|
@@ -2073,9 +2619,11 @@ setInterval(() => {
|
|
|
2073
2619
|
const now = Date.now();
|
|
2074
2620
|
for (const [userId, session] of sessions) {
|
|
2075
2621
|
if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
|
|
2622
|
+
sessionsExpiredTotal.inc();
|
|
2076
2623
|
clearSessionDownloads(session).catch(() => {});
|
|
2077
2624
|
session.context.close().catch(() => {});
|
|
2078
2625
|
sessions.delete(userId);
|
|
2626
|
+
refreshActiveTabsGauge();
|
|
2079
2627
|
log('info', 'session expired', { userId });
|
|
2080
2628
|
}
|
|
2081
2629
|
}
|
|
@@ -2083,6 +2631,7 @@ setInterval(() => {
|
|
|
2083
2631
|
if (sessions.size === 0) {
|
|
2084
2632
|
scheduleBrowserIdleShutdown();
|
|
2085
2633
|
}
|
|
2634
|
+
refreshTabLockQueueDepth();
|
|
2086
2635
|
}, 60_000);
|
|
2087
2636
|
|
|
2088
2637
|
// Per-tab inactivity reaper — close tabs idle for TAB_INACTIVITY_MS
|
|
@@ -2099,10 +2648,13 @@ setInterval(() => {
|
|
|
2099
2648
|
if (tabState.toolCalls === tabState._lastReaperToolCalls) {
|
|
2100
2649
|
const idleMs = now - tabState._lastReaperCheck;
|
|
2101
2650
|
if (idleMs >= TAB_INACTIVITY_MS) {
|
|
2651
|
+
tabsReapedTotal.inc();
|
|
2102
2652
|
log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
|
|
2103
2653
|
safePageClose(tabState.page);
|
|
2104
2654
|
group.delete(tabId);
|
|
2105
2655
|
{ const _l = tabLocks.get(tabId); if (_l) _l.drain(); tabLocks.delete(tabId); }
|
|
2656
|
+
refreshTabLockQueueDepth();
|
|
2657
|
+
refreshActiveTabsGauge();
|
|
2106
2658
|
}
|
|
2107
2659
|
} else {
|
|
2108
2660
|
tabState._lastReaperCheck = now;
|
|
@@ -2180,26 +2732,26 @@ app.post('/tabs/open', async (req, res) => {
|
|
|
2180
2732
|
|
|
2181
2733
|
const session = await getSession(userId);
|
|
2182
2734
|
|
|
2183
|
-
//
|
|
2184
|
-
if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
|
|
2185
|
-
return res.status(429).json({ error: 'Maximum global tabs reached' });
|
|
2186
|
-
}
|
|
2187
|
-
|
|
2735
|
+
// Recycle oldest tab when limits are reached instead of rejecting
|
|
2188
2736
|
let totalTabs = 0;
|
|
2189
2737
|
for (const g of session.tabGroups.values()) totalTabs += g.size;
|
|
2190
|
-
if (totalTabs >= MAX_TABS_PER_SESSION) {
|
|
2191
|
-
|
|
2738
|
+
if (totalTabs >= MAX_TABS_PER_SESSION || getTotalTabCount() >= MAX_TABS_GLOBAL) {
|
|
2739
|
+
const recycled = await recycleOldestTab(session, req.reqId);
|
|
2740
|
+
if (!recycled) {
|
|
2741
|
+
return res.status(429).json({ error: 'Maximum tabs per session reached' });
|
|
2742
|
+
}
|
|
2192
2743
|
}
|
|
2193
2744
|
|
|
2194
2745
|
const group = getTabGroup(session, listItemId);
|
|
2195
2746
|
|
|
2196
2747
|
const page = await session.context.newPage();
|
|
2197
|
-
const tabId =
|
|
2748
|
+
const tabId = fly.makeTabId();
|
|
2198
2749
|
const tabState = createTabState(page);
|
|
2199
2750
|
attachDownloadListener(tabState, tabId, log);
|
|
2200
2751
|
group.set(tabId, tabState);
|
|
2752
|
+
refreshActiveTabsGauge();
|
|
2201
2753
|
|
|
2202
|
-
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
2754
|
+
await withPageLoadDuration('open_url', () => page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
2203
2755
|
tabState.visitedUrls.add(url);
|
|
2204
2756
|
|
|
2205
2757
|
log('info', 'openclaw tab opened', { reqId: req.reqId, tabId, url: page.url() });
|
|
@@ -2222,6 +2774,7 @@ app.post('/start', async (req, res) => {
|
|
|
2222
2774
|
await ensureBrowser();
|
|
2223
2775
|
res.json({ ok: true, profile: 'camoufox' });
|
|
2224
2776
|
} catch (err) {
|
|
2777
|
+
failuresTotal.labels('browser_launch', 'start').inc();
|
|
2225
2778
|
res.status(500).json({ ok: false, error: safeError(err) });
|
|
2226
2779
|
}
|
|
2227
2780
|
});
|
|
@@ -2242,7 +2795,21 @@ app.post('/stop', async (req, res) => {
|
|
|
2242
2795
|
cleanupTasks.push(clearSessionDownloads(session));
|
|
2243
2796
|
}
|
|
2244
2797
|
await Promise.all(cleanupTasks);
|
|
2798
|
+
for (const session of sessions.values()) {
|
|
2799
|
+
for (const [, group] of session.tabGroups) {
|
|
2800
|
+
for (const tabId of group.keys()) {
|
|
2801
|
+
const lock = tabLocks.get(tabId);
|
|
2802
|
+
if (lock) {
|
|
2803
|
+
lock.drain();
|
|
2804
|
+
tabLocks.delete(tabId);
|
|
2805
|
+
}
|
|
2806
|
+
}
|
|
2807
|
+
}
|
|
2808
|
+
}
|
|
2809
|
+
tabLocks.clear();
|
|
2245
2810
|
sessions.clear();
|
|
2811
|
+
refreshActiveTabsGauge();
|
|
2812
|
+
refreshTabLockQueueDepth();
|
|
2246
2813
|
res.json({ ok: true, stopped: true, profile: 'camoufox' });
|
|
2247
2814
|
} catch (err) {
|
|
2248
2815
|
res.status(500).json({ ok: false, error: safeError(err) });
|
|
@@ -2273,7 +2840,7 @@ app.post('/navigate', async (req, res) => {
|
|
|
2273
2840
|
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
2274
2841
|
|
|
2275
2842
|
const result = await withTabLock(targetId, async () => {
|
|
2276
|
-
await tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
2843
|
+
await withPageLoadDuration('navigate', () => tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }));
|
|
2277
2844
|
tabState.visitedUrls.add(url);
|
|
2278
2845
|
tabState.lastSnapshot = null;
|
|
2279
2846
|
|
|
@@ -2510,8 +3077,9 @@ app.post('/act', async (req, res) => {
|
|
|
2510
3077
|
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
2511
3078
|
await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
|
|
2512
3079
|
} else {
|
|
2513
|
-
const
|
|
2514
|
-
|
|
3080
|
+
const isVertical = direction === 'up' || direction === 'down';
|
|
3081
|
+
const delta = (direction === 'up' || direction === 'left') ? -amount : amount;
|
|
3082
|
+
await tabState.page.mouse.wheel(isVertical ? 0 : delta, isVertical ? delta : 0);
|
|
2515
3083
|
}
|
|
2516
3084
|
await tabState.page.waitForTimeout(300);
|
|
2517
3085
|
return { ok: true, targetId };
|
|
@@ -2611,6 +3179,7 @@ setInterval(async () => {
|
|
|
2611
3179
|
await testContext.close();
|
|
2612
3180
|
healthState.lastSuccessfulNav = Date.now();
|
|
2613
3181
|
} catch (err) {
|
|
3182
|
+
failuresTotal.labels('health_probe', 'internal').inc();
|
|
2614
3183
|
log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
|
|
2615
3184
|
if (testContext) await testContext.close().catch(() => {});
|
|
2616
3185
|
restartBrowser('health probe failed').catch(() => {});
|
|
@@ -2641,6 +3210,7 @@ async function gracefulShutdown(signal) {
|
|
|
2641
3210
|
forceTimeout.unref();
|
|
2642
3211
|
|
|
2643
3212
|
server.close();
|
|
3213
|
+
stopMemoryReporter();
|
|
2644
3214
|
|
|
2645
3215
|
for (const [userId, session] of sessions) {
|
|
2646
3216
|
await session.context.close().catch(() => {});
|
|
@@ -2652,17 +3222,32 @@ async function gracefulShutdown(signal) {
|
|
|
2652
3222
|
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
|
|
2653
3223
|
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
|
|
2654
3224
|
|
|
3225
|
+
// Idle self-shutdown REMOVED — it was racing with min_machines_running=2
|
|
3226
|
+
// and stopping machines that Fly couldn't auto-restart fast enough, leaving
|
|
3227
|
+
// only 1 machine to handle all browser traffic (causing timeouts for users).
|
|
3228
|
+
// Fly's auto_stop_machines=false + min_machines_running=2 handles scaling.
|
|
3229
|
+
|
|
2655
3230
|
const PORT = CONFIG.port;
|
|
2656
3231
|
const server = app.listen(PORT, async () => {
|
|
2657
|
-
|
|
3232
|
+
startMemoryReporter();
|
|
3233
|
+
refreshActiveTabsGauge();
|
|
3234
|
+
refreshTabLockQueueDepth();
|
|
3235
|
+
if (FLY_MACHINE_ID) {
|
|
3236
|
+
log('info', 'server started (fly)', { port: PORT, pid: process.pid, machineId: FLY_MACHINE_ID, nodeVersion: process.version });
|
|
3237
|
+
} else {
|
|
3238
|
+
log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
|
|
3239
|
+
}
|
|
2658
3240
|
// Pre-warm browser so first request doesn't eat a 6-7s cold start
|
|
2659
3241
|
try {
|
|
2660
3242
|
const start = Date.now();
|
|
2661
3243
|
await ensureBrowser();
|
|
2662
3244
|
log('info', 'browser pre-warmed', { ms: Date.now() - start });
|
|
3245
|
+
scheduleBrowserIdleShutdown();
|
|
2663
3246
|
} catch (err) {
|
|
2664
|
-
log('error', 'browser pre-warm failed (will retry
|
|
3247
|
+
log('error', 'browser pre-warm failed (will retry in background)', { error: err.message });
|
|
3248
|
+
scheduleBrowserWarmRetry();
|
|
2665
3249
|
}
|
|
3250
|
+
// Idle self-shutdown removed — Fly manages machine lifecycle via fly.toml.
|
|
2666
3251
|
});
|
|
2667
3252
|
|
|
2668
3253
|
server.on('error', (err) => {
|