@askjo/camofox-browser 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/lib/config.js +11 -3
- package/lib/cookies.js +3 -3
- package/lib/downloads.js +240 -0
- package/lib/launcher.js +3 -3
- package/lib/macros.js +1 -1
- package/lib/snapshot.js +1 -1
- package/lib/youtube.js +286 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +9 -5
- package/plugin.ts +23 -0
- package/scripts/sync-version.js +25 -0
- package/server.js +874 -341
package/server.js
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
import { Camoufox, launchOptions } from 'camoufox-js';
|
|
2
|
+
import { firefox } from 'playwright-core';
|
|
3
|
+
import express from 'express';
|
|
4
|
+
import crypto from 'crypto';
|
|
5
|
+
import os from 'os';
|
|
6
|
+
import { expandMacro } from './lib/macros.js';
|
|
7
|
+
import { loadConfig } from './lib/config.js';
|
|
8
|
+
import { windowSnapshot } from './lib/snapshot.js';
|
|
9
|
+
import {
|
|
10
|
+
MAX_DOWNLOAD_INLINE_BYTES,
|
|
11
|
+
clearTabDownloads,
|
|
12
|
+
clearSessionDownloads,
|
|
13
|
+
attachDownloadListener,
|
|
14
|
+
getDownloadsList,
|
|
15
|
+
extractPageImages,
|
|
16
|
+
} from './lib/downloads.js';
|
|
17
|
+
import { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } from './lib/youtube.js';
|
|
9
18
|
|
|
10
19
|
const CONFIG = loadConfig();
|
|
11
20
|
|
|
@@ -71,6 +80,16 @@ function timingSafeCompare(a, b) {
|
|
|
71
80
|
return crypto.timingSafeEqual(bufA, bufB);
|
|
72
81
|
}
|
|
73
82
|
|
|
83
|
+
// Custom error for stale/unknown element refs — returned as 422 instead of 500
|
|
84
|
+
class StaleRefsError extends Error {
|
|
85
|
+
constructor(ref, maxRef, totalRefs) {
|
|
86
|
+
super(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${totalRefs} total). Refs reset after navigation - call snapshot first.`);
|
|
87
|
+
this.name = 'StaleRefsError';
|
|
88
|
+
this.code = 'stale_refs';
|
|
89
|
+
this.ref = ref;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
74
93
|
function safeError(err) {
|
|
75
94
|
if (CONFIG.nodeEnv === 'production') {
|
|
76
95
|
log('error', 'internal error', { error: err.message, stack: err.stack });
|
|
@@ -79,6 +98,17 @@ function safeError(err) {
|
|
|
79
98
|
return err.message;
|
|
80
99
|
}
|
|
81
100
|
|
|
101
|
+
// Send error response with appropriate status code (422 for stale refs, 500 otherwise)
|
|
102
|
+
function sendError(res, err, extraFields = {}) {
|
|
103
|
+
const status = err instanceof StaleRefsError ? 422 : (err.statusCode || 500);
|
|
104
|
+
const body = { error: safeError(err), ...extraFields };
|
|
105
|
+
if (err instanceof StaleRefsError) {
|
|
106
|
+
body.code = 'stale_refs';
|
|
107
|
+
body.ref = err.ref;
|
|
108
|
+
}
|
|
109
|
+
res.status(status).json(body);
|
|
110
|
+
}
|
|
111
|
+
|
|
82
112
|
function validateUrl(url) {
|
|
83
113
|
try {
|
|
84
114
|
const parsed = new URL(url);
|
|
@@ -91,26 +121,38 @@ function validateUrl(url) {
|
|
|
91
121
|
}
|
|
92
122
|
}
|
|
93
123
|
|
|
124
|
+
function isLoopbackAddress(address) {
|
|
125
|
+
if (!address) return false;
|
|
126
|
+
return address === '127.0.0.1' || address === '::1' || address === '::ffff:127.0.0.1';
|
|
127
|
+
}
|
|
128
|
+
|
|
94
129
|
// Import cookies into a user's browser context (Playwright cookies format)
|
|
95
130
|
// POST /sessions/:userId/cookies { cookies: Cookie[] }
|
|
96
131
|
//
|
|
97
132
|
// SECURITY:
|
|
98
133
|
// Cookie injection moves this from "anonymous browsing" to "authenticated browsing".
|
|
99
|
-
//
|
|
100
|
-
//
|
|
134
|
+
// By default, this endpoint is protected by CAMOFOX_API_KEY.
|
|
135
|
+
// For local development convenience, when CAMOFOX_API_KEY is NOT set, we allow
|
|
136
|
+
// unauthenticated cookie import ONLY from loopback (127.0.0.1 / ::1) and ONLY
|
|
137
|
+
// when NODE_ENV != production.
|
|
101
138
|
app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (req, res) => {
|
|
102
139
|
try {
|
|
103
|
-
if (
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
140
|
+
if (CONFIG.apiKey) {
|
|
141
|
+
const apiKey = CONFIG.apiKey;
|
|
142
|
+
const auth = String(req.headers['authorization'] || '');
|
|
143
|
+
const match = auth.match(/^Bearer\s+(.+)$/i);
|
|
144
|
+
if (!match || !timingSafeCompare(match[1], apiKey)) {
|
|
145
|
+
return res.status(403).json({ error: 'Forbidden' });
|
|
146
|
+
}
|
|
147
|
+
} else {
|
|
148
|
+
const remoteAddress = req.socket?.remoteAddress || '';
|
|
149
|
+
const allowUnauthedLocal = CONFIG.nodeEnv !== 'production' && isLoopbackAddress(remoteAddress);
|
|
150
|
+
if (!allowUnauthedLocal) {
|
|
151
|
+
return res.status(403).json({
|
|
152
|
+
error:
|
|
153
|
+
'Cookie import is disabled without CAMOFOX_API_KEY except for loopback requests in non-production environments.',
|
|
154
|
+
});
|
|
155
|
+
}
|
|
114
156
|
}
|
|
115
157
|
|
|
116
158
|
const userId = req.params.userId;
|
|
@@ -168,54 +210,86 @@ app.post('/sessions/:userId/cookies', express.json({ limit: '512kb' }), async (r
|
|
|
168
210
|
|
|
169
211
|
let browser = null;
|
|
170
212
|
// userId -> { context, tabGroups: Map<sessionKey, Map<tabId, TabState>>, lastAccess }
|
|
171
|
-
// TabState = { page, refs: Map<refId, {role, name, nth}>, visitedUrls: Set, toolCalls: number }
|
|
213
|
+
// TabState = { page, refs: Map<refId, {role, name, nth}>, visitedUrls: Set, downloads: Array, toolCalls: number }
|
|
172
214
|
// Note: sessionKey was previously called listItemId - both are accepted for backward compatibility
|
|
173
215
|
const sessions = new Map();
|
|
174
216
|
|
|
175
|
-
const SESSION_TIMEOUT_MS =
|
|
217
|
+
const SESSION_TIMEOUT_MS = CONFIG.sessionTimeoutMs;
|
|
176
218
|
const MAX_SNAPSHOT_NODES = 500;
|
|
177
|
-
const
|
|
178
|
-
const
|
|
179
|
-
const
|
|
180
|
-
const
|
|
181
|
-
const
|
|
219
|
+
const TAB_INACTIVITY_MS = CONFIG.tabInactivityMs;
|
|
220
|
+
const MAX_SESSIONS = CONFIG.maxSessions;
|
|
221
|
+
const MAX_TABS_PER_SESSION = CONFIG.maxTabsPerSession;
|
|
222
|
+
const MAX_TABS_GLOBAL = CONFIG.maxTabsGlobal;
|
|
223
|
+
const HANDLER_TIMEOUT_MS = CONFIG.handlerTimeoutMs;
|
|
224
|
+
const MAX_CONCURRENT_PER_USER = CONFIG.maxConcurrentPerUser;
|
|
182
225
|
const PAGE_CLOSE_TIMEOUT_MS = 5000;
|
|
183
|
-
const NAVIGATE_TIMEOUT_MS =
|
|
184
|
-
const BUILDREFS_TIMEOUT_MS =
|
|
226
|
+
const NAVIGATE_TIMEOUT_MS = CONFIG.navigateTimeoutMs;
|
|
227
|
+
const BUILDREFS_TIMEOUT_MS = CONFIG.buildrefsTimeoutMs;
|
|
185
228
|
const FAILURE_THRESHOLD = 3;
|
|
186
|
-
const
|
|
229
|
+
const MAX_CONSECUTIVE_TIMEOUTS = 3;
|
|
230
|
+
const TAB_LOCK_TIMEOUT_MS = 35000; // Must be > HANDLER_TIMEOUT_MS so active op times out first
|
|
231
|
+
|
|
232
|
+
// Proper mutex for tab serialization. The old Promise-chain lock on timeout proceeded
|
|
233
|
+
// WITHOUT the lock, allowing concurrent Playwright operations that corrupt CDP state.
|
|
234
|
+
class TabLock {
|
|
235
|
+
constructor() {
|
|
236
|
+
this.queue = [];
|
|
237
|
+
this.active = false;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
acquire(timeoutMs) {
|
|
241
|
+
return new Promise((resolve, reject) => {
|
|
242
|
+
const entry = { resolve, reject, timer: null };
|
|
243
|
+
entry.timer = setTimeout(() => {
|
|
244
|
+
const idx = this.queue.indexOf(entry);
|
|
245
|
+
if (idx !== -1) this.queue.splice(idx, 1);
|
|
246
|
+
reject(new Error('Tab lock queue timeout'));
|
|
247
|
+
}, timeoutMs);
|
|
248
|
+
this.queue.push(entry);
|
|
249
|
+
this._tryNext();
|
|
250
|
+
});
|
|
251
|
+
}
|
|
187
252
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
253
|
+
release() {
|
|
254
|
+
this.active = false;
|
|
255
|
+
this._tryNext();
|
|
256
|
+
}
|
|
191
257
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
258
|
+
_tryNext() {
|
|
259
|
+
if (this.active || this.queue.length === 0) return;
|
|
260
|
+
this.active = true;
|
|
261
|
+
const entry = this.queue.shift();
|
|
262
|
+
clearTimeout(entry.timer);
|
|
263
|
+
entry.resolve();
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
drain() {
|
|
267
|
+
this.active = true;
|
|
268
|
+
for (const entry of this.queue) {
|
|
269
|
+
clearTimeout(entry.timer);
|
|
270
|
+
entry.reject(new Error('Tab destroyed'));
|
|
205
271
|
}
|
|
272
|
+
this.queue = [];
|
|
206
273
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Per-tab locks to serialize operations on the same tab
|
|
277
|
+
const tabLocks = new Map(); // tabId -> TabLock
|
|
278
|
+
|
|
279
|
+
function getTabLock(tabId) {
|
|
280
|
+
if (!tabLocks.has(tabId)) tabLocks.set(tabId, new TabLock());
|
|
281
|
+
return tabLocks.get(tabId);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Timeout is INSIDE the lock so each operation gets its full budget
|
|
285
|
+
// regardless of how long it waited in the queue.
|
|
286
|
+
async function withTabLock(tabId, operation, timeoutMs = HANDLER_TIMEOUT_MS) {
|
|
287
|
+
const lock = getTabLock(tabId);
|
|
288
|
+
await lock.acquire(TAB_LOCK_TIMEOUT_MS);
|
|
212
289
|
try {
|
|
213
|
-
return await
|
|
290
|
+
return await withTimeout(operation(), timeoutMs, 'action');
|
|
214
291
|
} finally {
|
|
215
|
-
|
|
216
|
-
if (tabLocks.get(tabId) === promise) {
|
|
217
|
-
tabLocks.delete(tabId);
|
|
218
|
-
}
|
|
292
|
+
lock.release();
|
|
219
293
|
}
|
|
220
294
|
}
|
|
221
295
|
|
|
@@ -297,7 +371,7 @@ function buildProxyConfig() {
|
|
|
297
371
|
};
|
|
298
372
|
}
|
|
299
373
|
|
|
300
|
-
const BROWSER_IDLE_TIMEOUT_MS =
|
|
374
|
+
const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
|
|
301
375
|
let browserIdleTimer = null;
|
|
302
376
|
let browserLaunchPromise = null;
|
|
303
377
|
|
|
@@ -424,6 +498,20 @@ function normalizeUserId(userId) {
|
|
|
424
498
|
async function getSession(userId) {
|
|
425
499
|
const key = normalizeUserId(userId);
|
|
426
500
|
let session = sessions.get(key);
|
|
501
|
+
|
|
502
|
+
// Check if existing session's context is still alive
|
|
503
|
+
if (session) {
|
|
504
|
+
try {
|
|
505
|
+
// Lightweight probe: pages() is synchronous-ish and throws if context is dead
|
|
506
|
+
session.context.pages();
|
|
507
|
+
} catch (err) {
|
|
508
|
+
log('warn', 'session context dead, recreating', { userId: key, error: err.message });
|
|
509
|
+
session.context.close().catch(() => {});
|
|
510
|
+
sessions.delete(key);
|
|
511
|
+
session = null;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
427
515
|
if (!session) {
|
|
428
516
|
if (sessions.size >= MAX_SESSIONS) {
|
|
429
517
|
throw new Error('Maximum concurrent sessions reached');
|
|
@@ -459,6 +547,94 @@ function getTabGroup(session, listItemId) {
|
|
|
459
547
|
return group;
|
|
460
548
|
}
|
|
461
549
|
|
|
550
|
+
function isDeadContextError(err) {
|
|
551
|
+
const msg = err && err.message || '';
|
|
552
|
+
return msg.includes('Target page, context or browser has been closed') ||
|
|
553
|
+
msg.includes('browser has been closed') ||
|
|
554
|
+
msg.includes('Context closed') ||
|
|
555
|
+
msg.includes('Browser closed');
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
function isTimeoutError(err) {
|
|
559
|
+
const msg = err && err.message || '';
|
|
560
|
+
return msg.includes('timed out after') ||
|
|
561
|
+
(msg.includes('Timeout') && msg.includes('exceeded'));
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
function isTabLockQueueTimeout(err) {
|
|
565
|
+
return err && err.message === 'Tab lock queue timeout';
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
function isTabDestroyedError(err) {
|
|
569
|
+
return err && err.message === 'Tab destroyed';
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// Centralized error handler for route catch blocks.
|
|
573
|
+
// Auto-destroys dead browser sessions and returns appropriate status codes.
|
|
574
|
+
function handleRouteError(err, req, res, extraFields = {}) {
|
|
575
|
+
const userId = req.body?.userId || req.query?.userId;
|
|
576
|
+
if (userId && isDeadContextError(err)) {
|
|
577
|
+
destroySession(userId);
|
|
578
|
+
}
|
|
579
|
+
// Track consecutive timeouts per tab and auto-destroy stuck tabs
|
|
580
|
+
if (userId && isTimeoutError(err)) {
|
|
581
|
+
const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
|
|
582
|
+
const session = sessions.get(normalizeUserId(userId));
|
|
583
|
+
if (session && tabId) {
|
|
584
|
+
const found = findTab(session, tabId);
|
|
585
|
+
if (found) {
|
|
586
|
+
found.tabState.consecutiveTimeouts++;
|
|
587
|
+
if (found.tabState.consecutiveTimeouts >= MAX_CONSECUTIVE_TIMEOUTS) {
|
|
588
|
+
log('warn', 'auto-destroying tab after consecutive timeouts', { tabId, count: found.tabState.consecutiveTimeouts });
|
|
589
|
+
destroyTab(session, tabId);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
// Lock queue timeout = tab is stuck. Destroy immediately.
|
|
595
|
+
if (userId && isTabLockQueueTimeout(err)) {
|
|
596
|
+
const tabId = req.body?.tabId || req.query?.tabId || req.params?.tabId;
|
|
597
|
+
const session = sessions.get(normalizeUserId(userId));
|
|
598
|
+
if (session && tabId) {
|
|
599
|
+
destroyTab(session, tabId);
|
|
600
|
+
}
|
|
601
|
+
return res.status(503).json({ error: 'Tab unresponsive and has been destroyed. Open a new tab.', ...extraFields });
|
|
602
|
+
}
|
|
603
|
+
// Tab was destroyed while this request was queued in the lock
|
|
604
|
+
if (isTabDestroyedError(err)) {
|
|
605
|
+
return res.status(410).json({ error: 'Tab was destroyed. Open a new tab.', ...extraFields });
|
|
606
|
+
}
|
|
607
|
+
sendError(res, err, extraFields);
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function destroyTab(session, tabId) {
|
|
611
|
+
const lock = tabLocks.get(tabId);
|
|
612
|
+
if (lock) {
|
|
613
|
+
lock.drain();
|
|
614
|
+
tabLocks.delete(tabId);
|
|
615
|
+
}
|
|
616
|
+
for (const [listItemId, group] of session.tabGroups) {
|
|
617
|
+
if (group.has(tabId)) {
|
|
618
|
+
const tabState = group.get(tabId);
|
|
619
|
+
log('warn', 'destroying stuck tab', { tabId, listItemId, toolCalls: tabState.toolCalls });
|
|
620
|
+
safePageClose(tabState.page);
|
|
621
|
+
group.delete(tabId);
|
|
622
|
+
if (group.size === 0) session.tabGroups.delete(listItemId);
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
return false;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function destroySession(userId) {
|
|
630
|
+
const key = normalizeUserId(userId);
|
|
631
|
+
const session = sessions.get(key);
|
|
632
|
+
if (!session) return;
|
|
633
|
+
log('warn', 'destroying dead session', { userId: key });
|
|
634
|
+
session.context.close().catch(() => {});
|
|
635
|
+
sessions.delete(key);
|
|
636
|
+
}
|
|
637
|
+
|
|
462
638
|
function findTab(session, tabId) {
|
|
463
639
|
for (const [listItemId, group] of session.tabGroups) {
|
|
464
640
|
if (group.has(tabId)) {
|
|
@@ -474,11 +650,15 @@ function createTabState(page) {
|
|
|
474
650
|
page,
|
|
475
651
|
refs: new Map(),
|
|
476
652
|
visitedUrls: new Set(),
|
|
653
|
+
downloads: [],
|
|
477
654
|
toolCalls: 0,
|
|
655
|
+
consecutiveTimeouts: 0,
|
|
478
656
|
lastSnapshot: null,
|
|
479
657
|
};
|
|
480
658
|
}
|
|
481
659
|
|
|
660
|
+
|
|
661
|
+
|
|
482
662
|
async function waitForPageReady(page, options = {}) {
|
|
483
663
|
const { timeout = 10000, waitForNetwork = true } = options;
|
|
484
664
|
|
|
@@ -568,6 +748,156 @@ async function dismissConsentDialogs(page) {
|
|
|
568
748
|
}
|
|
569
749
|
}
|
|
570
750
|
|
|
751
|
+
// --- Google SERP detection ---
|
|
752
|
+
function isGoogleSerp(url) {
|
|
753
|
+
try {
|
|
754
|
+
const parsed = new URL(url);
|
|
755
|
+
return parsed.hostname.includes('google.') && parsed.pathname === '/search';
|
|
756
|
+
} catch {
|
|
757
|
+
return false;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
// --- Google SERP: combined extraction (refs + snapshot in one DOM pass) ---
|
|
762
|
+
// Returns { refs: Map, snapshot: string }
|
|
763
|
+
async function extractGoogleSerp(page) {
|
|
764
|
+
const refs = new Map();
|
|
765
|
+
if (!page || page.isClosed()) return { refs, snapshot: '' };
|
|
766
|
+
|
|
767
|
+
const start = Date.now();
|
|
768
|
+
|
|
769
|
+
const alreadyRendered = await page.evaluate(() => !!document.querySelector('#rso h3, #search h3, #rso [data-snhf]')).catch(() => false);
|
|
770
|
+
if (!alreadyRendered) {
|
|
771
|
+
try {
|
|
772
|
+
await page.waitForSelector('#rso h3, #search h3, #rso [data-snhf]', { timeout: 5000 });
|
|
773
|
+
} catch {
|
|
774
|
+
try {
|
|
775
|
+
await page.waitForSelector('#rso a[href]:not([href^="/search"]), #search a[href]:not([href^="/search"])', { timeout: 2000 });
|
|
776
|
+
} catch {}
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
const extracted = await page.evaluate(() => {
|
|
781
|
+
const snapshot = [];
|
|
782
|
+
const elements = [];
|
|
783
|
+
let refCounter = 1;
|
|
784
|
+
|
|
785
|
+
function addRef(role, name) {
|
|
786
|
+
const id = 'e' + refCounter++;
|
|
787
|
+
elements.push({ id, role, name });
|
|
788
|
+
return id;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
snapshot.push('- heading "' + document.title.replace(/"/g, '\\"') + '"');
|
|
792
|
+
|
|
793
|
+
const searchInput = document.querySelector('input[name="q"], textarea[name="q"]');
|
|
794
|
+
if (searchInput) {
|
|
795
|
+
const name = 'Search';
|
|
796
|
+
const refId = addRef('searchbox', name);
|
|
797
|
+
snapshot.push('- searchbox "' + name + '" [' + refId + ']: ' + (searchInput.value || ''));
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
const navContainer = document.querySelector('div[role="navigation"], div[role="list"]');
|
|
801
|
+
if (navContainer) {
|
|
802
|
+
const navLinks = navContainer.querySelectorAll('a');
|
|
803
|
+
if (navLinks.length > 0) {
|
|
804
|
+
snapshot.push('- navigation:');
|
|
805
|
+
navLinks.forEach(a => {
|
|
806
|
+
const text = (a.textContent || '').trim();
|
|
807
|
+
if (!text || text.length < 1) return;
|
|
808
|
+
if (/^\d+$/.test(text) && parseInt(text) < 50) return;
|
|
809
|
+
const refId = addRef('link', text);
|
|
810
|
+
snapshot.push(' - link "' + text + '" [' + refId + ']');
|
|
811
|
+
});
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
const resultContainer = document.querySelector('#rso') || document.querySelector('#search');
|
|
816
|
+
if (resultContainer) {
|
|
817
|
+
const resultBlocks = resultContainer.querySelectorAll(':scope > div');
|
|
818
|
+
for (const block of resultBlocks) {
|
|
819
|
+
const h3 = block.querySelector('h3');
|
|
820
|
+
const mainLink = h3 ? h3.closest('a') : null;
|
|
821
|
+
|
|
822
|
+
if (h3 && mainLink) {
|
|
823
|
+
const title = h3.textContent.trim().replace(/"/g, '\\"');
|
|
824
|
+
const href = mainLink.href;
|
|
825
|
+
const cite = block.querySelector('cite');
|
|
826
|
+
const displayUrl = cite ? cite.textContent.trim() : '';
|
|
827
|
+
|
|
828
|
+
let snippet = '';
|
|
829
|
+
for (const sel of ['[data-sncf]', '[data-content-feature="1"]', '.VwiC3b', 'div[style*="-webkit-line-clamp"]', 'span.aCOpRe']) {
|
|
830
|
+
const el = block.querySelector(sel);
|
|
831
|
+
if (el) { snippet = el.textContent.trim().slice(0, 300); break; }
|
|
832
|
+
}
|
|
833
|
+
if (!snippet) {
|
|
834
|
+
const allText = block.textContent.trim().replace(/\s+/g, ' ');
|
|
835
|
+
const titleLen = title.length + (displayUrl ? displayUrl.length : 0);
|
|
836
|
+
if (allText.length > titleLen + 20) {
|
|
837
|
+
snippet = allText.slice(titleLen).trim().slice(0, 300);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
const refId = addRef('link', title);
|
|
842
|
+
snapshot.push('- link "' + title + '" [' + refId + ']:');
|
|
843
|
+
snapshot.push(' - /url: ' + href);
|
|
844
|
+
if (displayUrl) snapshot.push(' - cite: ' + displayUrl);
|
|
845
|
+
if (snippet) snapshot.push(' - text: ' + snippet);
|
|
846
|
+
} else {
|
|
847
|
+
const blockLinks = block.querySelectorAll('a[href^="http"]:not([href*="google.com/search"])');
|
|
848
|
+
if (blockLinks.length > 0) {
|
|
849
|
+
const blockText = block.textContent.trim().replace(/\s+/g, ' ').slice(0, 200);
|
|
850
|
+
if (blockText.length > 10) {
|
|
851
|
+
snapshot.push('- group:');
|
|
852
|
+
snapshot.push(' - text: ' + blockText);
|
|
853
|
+
blockLinks.forEach(a => {
|
|
854
|
+
const linkText = (a.textContent || '').trim().replace(/"/g, '\\"').slice(0, 100);
|
|
855
|
+
if (linkText.length > 2) {
|
|
856
|
+
const refId = addRef('link', linkText);
|
|
857
|
+
snapshot.push(' - link "' + linkText + '" [' + refId + ']:');
|
|
858
|
+
snapshot.push(' - /url: ' + a.href);
|
|
859
|
+
}
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
const paaItems = document.querySelectorAll('[jsname="Cpkphb"], div.related-question-pair');
|
|
868
|
+
if (paaItems.length > 0) {
|
|
869
|
+
snapshot.push('- heading "People also ask"');
|
|
870
|
+
paaItems.forEach(q => {
|
|
871
|
+
const text = (q.textContent || '').trim().replace(/"/g, '\\"').slice(0, 150);
|
|
872
|
+
if (text) {
|
|
873
|
+
const refId = addRef('button', text);
|
|
874
|
+
snapshot.push(' - button "' + text + '" [' + refId + ']');
|
|
875
|
+
}
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
const nextLink = document.querySelector('#botstuff a[aria-label="Next page"], td.d6cvqb a, a#pnnext');
|
|
880
|
+
if (nextLink) {
|
|
881
|
+
const refId = addRef('link', 'Next');
|
|
882
|
+
snapshot.push('- navigation "pagination":');
|
|
883
|
+
snapshot.push(' - link "Next" [' + refId + ']');
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
return { snapshot: snapshot.join('\n'), elements };
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
const seenCounts = new Map();
|
|
890
|
+
for (const el of extracted.elements) {
|
|
891
|
+
const key = `${el.role}:${el.name}`;
|
|
892
|
+
const nth = seenCounts.get(key) || 0;
|
|
893
|
+
seenCounts.set(key, nth + 1);
|
|
894
|
+
refs.set(el.id, { role: el.role, name: el.name, nth });
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
log('info', 'extractGoogleSerp', { elapsed: Date.now() - start, refs: refs.size });
|
|
898
|
+
return { refs, snapshot: extracted.snapshot };
|
|
899
|
+
}
|
|
900
|
+
|
|
571
901
|
async function buildRefs(page) {
|
|
572
902
|
const refs = new Map();
|
|
573
903
|
|
|
@@ -576,6 +906,13 @@ async function buildRefs(page) {
|
|
|
576
906
|
return refs;
|
|
577
907
|
}
|
|
578
908
|
|
|
909
|
+
// Google SERP fast path — skip ariaSnapshot entirely
|
|
910
|
+
const url = page.url();
|
|
911
|
+
if (isGoogleSerp(url)) {
|
|
912
|
+
const { refs: googleRefs } = await extractGoogleSerp(page);
|
|
913
|
+
return googleRefs;
|
|
914
|
+
}
|
|
915
|
+
|
|
579
916
|
const start = Date.now();
|
|
580
917
|
|
|
581
918
|
// Hard total timeout on the entire buildRefs operation
|
|
@@ -690,35 +1027,11 @@ function refToLocator(page, ref, refs) {
|
|
|
690
1027
|
return locator;
|
|
691
1028
|
}
|
|
692
1029
|
|
|
693
|
-
// --- YouTube transcript
|
|
694
|
-
//
|
|
695
|
-
//
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
const { execFile } = require('child_process');
|
|
700
|
-
const { mkdtemp, readFile, readdir, rm } = require('fs/promises');
|
|
701
|
-
const { tmpdir } = require('os');
|
|
702
|
-
const { join } = require('path');
|
|
703
|
-
|
|
704
|
-
// Detect yt-dlp binary at startup
|
|
705
|
-
let ytDlpPath = null;
|
|
706
|
-
(async () => {
|
|
707
|
-
for (const candidate of ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp']) {
|
|
708
|
-
try {
|
|
709
|
-
await new Promise((resolve, reject) => {
|
|
710
|
-
execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
|
|
711
|
-
if (err) return reject(err);
|
|
712
|
-
resolve(stdout.trim());
|
|
713
|
-
});
|
|
714
|
-
});
|
|
715
|
-
ytDlpPath = candidate;
|
|
716
|
-
log('info', 'yt-dlp found', { path: candidate });
|
|
717
|
-
break;
|
|
718
|
-
} catch {}
|
|
719
|
-
}
|
|
720
|
-
if (!ytDlpPath) log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
|
|
721
|
-
})();
|
|
1030
|
+
// --- YouTube transcript ---
|
|
1031
|
+
// Implementation extracted to lib/youtube.js to avoid scanner false positives
|
|
1032
|
+
// (child_process + app.post in same file triggers OpenClaw skill-scanner)
|
|
1033
|
+
|
|
1034
|
+
detectYtDlp(log);
|
|
722
1035
|
|
|
723
1036
|
app.post('/youtube/transcript', async (req, res) => {
|
|
724
1037
|
const reqId = req.reqId;
|
|
@@ -738,11 +1051,16 @@ app.post('/youtube/transcript', async (req, res) => {
|
|
|
738
1051
|
const videoId = videoIdMatch[1];
|
|
739
1052
|
const lang = languages[0] || 'en';
|
|
740
1053
|
|
|
741
|
-
log('info', 'youtube transcript: starting', { reqId, videoId, lang, method:
|
|
1054
|
+
log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
|
|
742
1055
|
|
|
743
1056
|
let result;
|
|
744
|
-
if (
|
|
745
|
-
|
|
1057
|
+
if (hasYtDlp()) {
|
|
1058
|
+
try {
|
|
1059
|
+
result = await ytDlpTranscript(reqId, url, videoId, lang);
|
|
1060
|
+
} catch (ytErr) {
|
|
1061
|
+
log('warn', 'yt-dlp failed, falling back to browser', { reqId, error: ytErr.message });
|
|
1062
|
+
result = await browserTranscript(reqId, url, videoId, lang);
|
|
1063
|
+
}
|
|
746
1064
|
} else {
|
|
747
1065
|
result = await browserTranscript(reqId, url, videoId, lang);
|
|
748
1066
|
}
|
|
@@ -755,80 +1073,7 @@ app.post('/youtube/transcript', async (req, res) => {
|
|
|
755
1073
|
}
|
|
756
1074
|
});
|
|
757
1075
|
|
|
758
|
-
//
|
|
759
|
-
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
760
|
-
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
761
|
-
try {
|
|
762
|
-
// Step 1: Get title via --print (fast, no download)
|
|
763
|
-
const title = await new Promise((resolve, reject) => {
|
|
764
|
-
execFile(ytDlpPath, [
|
|
765
|
-
'--skip-download', '--no-warnings', '--print', '%(title)s', url,
|
|
766
|
-
], { timeout: 15000 }, (err, stdout) => {
|
|
767
|
-
if (err) return reject(new Error(`yt-dlp metadata failed: ${err.message}`));
|
|
768
|
-
resolve(stdout.trim().split('\n')[0] || '');
|
|
769
|
-
});
|
|
770
|
-
});
|
|
771
|
-
|
|
772
|
-
// Step 2: Download subtitles to temp dir
|
|
773
|
-
await new Promise((resolve, reject) => {
|
|
774
|
-
execFile(ytDlpPath, [
|
|
775
|
-
'--skip-download',
|
|
776
|
-
'--write-sub', '--write-auto-sub',
|
|
777
|
-
'--sub-lang', lang,
|
|
778
|
-
'--sub-format', 'json3',
|
|
779
|
-
'-o', join(tmpDir, '%(id)s'),
|
|
780
|
-
url,
|
|
781
|
-
], { timeout: 30000 }, (err, stdout, stderr) => {
|
|
782
|
-
if (err) return reject(new Error(`yt-dlp subtitle download failed: ${err.message}\n${stderr}`));
|
|
783
|
-
resolve();
|
|
784
|
-
});
|
|
785
|
-
});
|
|
786
|
-
|
|
787
|
-
// Find the subtitle file
|
|
788
|
-
const files = await readdir(tmpDir);
|
|
789
|
-
const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
790
|
-
if (!subFile) {
|
|
791
|
-
return {
|
|
792
|
-
status: 'error', code: 404,
|
|
793
|
-
message: 'No captions available for this video',
|
|
794
|
-
video_url: url, video_id: videoId, title,
|
|
795
|
-
};
|
|
796
|
-
}
|
|
797
|
-
|
|
798
|
-
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
799
|
-
let transcriptText = null;
|
|
800
|
-
|
|
801
|
-
if (subFile.endsWith('.json3')) {
|
|
802
|
-
transcriptText = parseJson3(content);
|
|
803
|
-
} else if (subFile.endsWith('.vtt')) {
|
|
804
|
-
transcriptText = parseVtt(content);
|
|
805
|
-
} else {
|
|
806
|
-
transcriptText = parseXml(content);
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
if (!transcriptText || !transcriptText.trim()) {
|
|
810
|
-
return {
|
|
811
|
-
status: 'error', code: 404,
|
|
812
|
-
message: 'Subtitle file found but content was empty',
|
|
813
|
-
video_url: url, video_id: videoId, title,
|
|
814
|
-
};
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
// Detect language from filename (e.g., dQw4w9WgXcQ.en.json3)
|
|
818
|
-
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
819
|
-
|
|
820
|
-
return {
|
|
821
|
-
status: 'ok', transcript: transcriptText,
|
|
822
|
-
video_url: url, video_id: videoId, video_title: title,
|
|
823
|
-
language: langMatch?.[1] || lang,
|
|
824
|
-
total_words: transcriptText.split(/\s+/).length,
|
|
825
|
-
};
|
|
826
|
-
} finally {
|
|
827
|
-
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
828
|
-
}
|
|
829
|
-
}
|
|
830
|
-
|
|
831
|
-
// Strategy 2: Browser fallback — play video, intercept timedtext network response
|
|
1076
|
+
// Browser fallback — play video, intercept timedtext network response
|
|
832
1077
|
async function browserTranscript(reqId, url, videoId, lang) {
|
|
833
1078
|
return await withUserLimit('__yt_transcript__', async () => {
|
|
834
1079
|
await ensureBrowser();
|
|
@@ -836,13 +1081,11 @@ async function browserTranscript(reqId, url, videoId, lang) {
|
|
|
836
1081
|
const page = await session.context.newPage();
|
|
837
1082
|
|
|
838
1083
|
try {
|
|
839
|
-
// Mute audio
|
|
840
1084
|
await page.addInitScript(() => {
|
|
841
1085
|
const origPlay = HTMLMediaElement.prototype.play;
|
|
842
1086
|
HTMLMediaElement.prototype.play = function() { this.volume = 0; this.muted = true; return origPlay.call(this); };
|
|
843
1087
|
});
|
|
844
1088
|
|
|
845
|
-
// Intercept timedtext responses — filter by video ID to skip ad captions
|
|
846
1089
|
let interceptedCaptions = null;
|
|
847
1090
|
page.on('response', async (response) => {
|
|
848
1091
|
const respUrl = response.url();
|
|
@@ -857,24 +1100,57 @@ async function browserTranscript(reqId, url, videoId, lang) {
|
|
|
857
1100
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATE_TIMEOUT_MS });
|
|
858
1101
|
await page.waitForTimeout(2000);
|
|
859
1102
|
|
|
860
|
-
// Extract metadata from ytInitialPlayerResponse
|
|
1103
|
+
// Extract caption track URLs and metadata from ytInitialPlayerResponse
|
|
861
1104
|
const meta = await page.evaluate(() => {
|
|
862
1105
|
const r = window.ytInitialPlayerResponse || (typeof ytInitialPlayerResponse !== 'undefined' ? ytInitialPlayerResponse : null);
|
|
863
|
-
if (!r) return { title: '' };
|
|
1106
|
+
if (!r) return { title: '', tracks: [] };
|
|
864
1107
|
const tracks = r?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
|
|
865
1108
|
return {
|
|
866
1109
|
title: r?.videoDetails?.title || '',
|
|
867
|
-
|
|
1110
|
+
tracks: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual', url: t.baseUrl })),
|
|
868
1111
|
};
|
|
869
1112
|
});
|
|
870
1113
|
|
|
871
|
-
|
|
1114
|
+
log('info', 'youtube transcript: extracted caption tracks', { reqId, title: meta.title, trackCount: meta.tracks.length, tracks: meta.tracks.map(t => t.code) });
|
|
1115
|
+
|
|
1116
|
+
// Strategy A: Fetch caption track URL directly from ytInitialPlayerResponse
|
|
1117
|
+
// These URLs are freshly signed by YouTube and work immediately
|
|
1118
|
+
if (meta.tracks && meta.tracks.length > 0) {
|
|
1119
|
+
const track = meta.tracks.find(t => t.code === lang) || meta.tracks[0];
|
|
1120
|
+
if (track && track.url) {
|
|
1121
|
+
const captionUrl = track.url + (track.url.includes('?') ? '&' : '?') + 'fmt=json3';
|
|
1122
|
+
log('info', 'youtube transcript: fetching caption track', { reqId, lang: track.code, url: captionUrl.substring(0, 100) });
|
|
1123
|
+
try {
|
|
1124
|
+
const captionResp = await page.evaluate(async (fetchUrl) => {
|
|
1125
|
+
const resp = await fetch(fetchUrl);
|
|
1126
|
+
return resp.ok ? await resp.text() : null;
|
|
1127
|
+
}, captionUrl);
|
|
1128
|
+
if (captionResp && captionResp.length > 0) {
|
|
1129
|
+
let transcriptText = null;
|
|
1130
|
+
if (captionResp.trimStart().startsWith('{')) transcriptText = parseJson3(captionResp);
|
|
1131
|
+
else if (captionResp.includes('WEBVTT')) transcriptText = parseVtt(captionResp);
|
|
1132
|
+
else if (captionResp.includes('<text')) transcriptText = parseXml(captionResp);
|
|
1133
|
+
if (transcriptText && transcriptText.trim()) {
|
|
1134
|
+
return {
|
|
1135
|
+
status: 'ok', transcript: transcriptText,
|
|
1136
|
+
video_url: url, video_id: videoId, video_title: meta.title,
|
|
1137
|
+
language: track.code, total_words: transcriptText.split(/\s+/).length,
|
|
1138
|
+
available_languages: meta.tracks.map(t => ({ code: t.code, name: t.name, kind: t.kind })),
|
|
1139
|
+
};
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
} catch (fetchErr) {
|
|
1143
|
+
log('warn', 'youtube transcript: caption track fetch failed', { reqId, error: fetchErr.message });
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
// Strategy B: Play video and intercept timedtext network response
|
|
872
1149
|
await page.evaluate(() => {
|
|
873
1150
|
const v = document.querySelector('video');
|
|
874
1151
|
if (v) { v.muted = true; v.play().catch(() => {}); }
|
|
875
1152
|
}).catch(() => {});
|
|
876
1153
|
|
|
877
|
-
// Wait up to 20s for the target video's captions (may need to sit through an ad)
|
|
878
1154
|
for (let i = 0; i < 40 && !interceptedCaptions; i++) {
|
|
879
1155
|
await page.waitForTimeout(500);
|
|
880
1156
|
}
|
|
@@ -882,7 +1158,7 @@ async function browserTranscript(reqId, url, videoId, lang) {
|
|
|
882
1158
|
if (!interceptedCaptions) {
|
|
883
1159
|
return {
|
|
884
1160
|
status: 'error', code: 404,
|
|
885
|
-
message: 'No captions
|
|
1161
|
+
message: 'No captions available for this video',
|
|
886
1162
|
video_url: url, video_id: videoId, title: meta.title,
|
|
887
1163
|
};
|
|
888
1164
|
}
|
|
@@ -914,78 +1190,6 @@ async function browserTranscript(reqId, url, videoId, lang) {
|
|
|
914
1190
|
});
|
|
915
1191
|
}
|
|
916
1192
|
|
|
917
|
-
// --- YouTube transcript parsers ---
|
|
918
|
-
|
|
919
|
-
function parseJson3(content) {
|
|
920
|
-
try {
|
|
921
|
-
const data = JSON.parse(content);
|
|
922
|
-
const events = data.events || [];
|
|
923
|
-
const lines = [];
|
|
924
|
-
for (const event of events) {
|
|
925
|
-
const segs = event.segs || [];
|
|
926
|
-
if (!segs.length) continue;
|
|
927
|
-
const text = segs.map(s => s.utf8 || '').join('').trim();
|
|
928
|
-
if (!text) continue;
|
|
929
|
-
const tsMs = event.tStartMs || 0;
|
|
930
|
-
const tsSec = Math.floor(tsMs / 1000);
|
|
931
|
-
const mm = Math.floor(tsSec / 60);
|
|
932
|
-
const ss = tsSec % 60;
|
|
933
|
-
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
934
|
-
}
|
|
935
|
-
return lines.join('\n');
|
|
936
|
-
} catch (e) {
|
|
937
|
-
return null;
|
|
938
|
-
}
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
function parseVtt(content) {
|
|
942
|
-
const lines = content.split('\n');
|
|
943
|
-
const result = [];
|
|
944
|
-
let currentTimestamp = '';
|
|
945
|
-
for (const line of lines) {
|
|
946
|
-
const stripped = line.trim();
|
|
947
|
-
if (!stripped || stripped === 'WEBVTT' || stripped.startsWith('Kind:') || stripped.startsWith('Language:') || stripped.startsWith('NOTE')) continue;
|
|
948
|
-
if (stripped.includes(' --> ')) {
|
|
949
|
-
const parts = stripped.split(' --> ');
|
|
950
|
-
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
951
|
-
continue;
|
|
952
|
-
}
|
|
953
|
-
const text = stripped.replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
954
|
-
if (text && currentTimestamp) { result.push(`[${currentTimestamp}] ${text}`); currentTimestamp = ''; }
|
|
955
|
-
else if (text) result.push(text);
|
|
956
|
-
}
|
|
957
|
-
return result.join('\n');
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
function parseXml(content) {
|
|
961
|
-
const lines = [];
|
|
962
|
-
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
963
|
-
let match;
|
|
964
|
-
while ((match = regex.exec(content)) !== null) {
|
|
965
|
-
const startSec = parseFloat(match[1]) || 0;
|
|
966
|
-
const text = match[2].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
967
|
-
if (!text) continue;
|
|
968
|
-
const mm = Math.floor(startSec / 60);
|
|
969
|
-
const ss = Math.floor(startSec % 60);
|
|
970
|
-
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
971
|
-
}
|
|
972
|
-
return lines.join('\n');
|
|
973
|
-
}
|
|
974
|
-
|
|
975
|
-
function formatVttTs(ts) {
|
|
976
|
-
const parts = ts.split(':');
|
|
977
|
-
if (parts.length >= 3) {
|
|
978
|
-
const hours = parseInt(parts[0]) || 0;
|
|
979
|
-
const minutes = parseInt(parts[1]) || 0;
|
|
980
|
-
const totalMin = hours * 60 + minutes;
|
|
981
|
-
const seconds = (parts[2] || '00').split('.')[0];
|
|
982
|
-
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
983
|
-
} else if (parts.length === 2) {
|
|
984
|
-
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
985
|
-
}
|
|
986
|
-
return ts;
|
|
987
|
-
}
|
|
988
|
-
|
|
989
1193
|
app.get('/health', (req, res) => {
|
|
990
1194
|
if (healthState.isRecovering) {
|
|
991
1195
|
return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
|
|
@@ -1011,33 +1215,42 @@ app.post('/tabs', async (req, res) => {
|
|
|
1011
1215
|
return res.status(400).json({ error: 'userId and sessionKey required' });
|
|
1012
1216
|
}
|
|
1013
1217
|
|
|
1014
|
-
const
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
const
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1218
|
+
const result = await withTimeout((async () => {
|
|
1219
|
+
const session = await getSession(userId);
|
|
1220
|
+
|
|
1221
|
+
let totalTabs = 0;
|
|
1222
|
+
for (const group of session.tabGroups.values()) totalTabs += group.size;
|
|
1223
|
+
if (totalTabs >= MAX_TABS_PER_SESSION) {
|
|
1224
|
+
throw Object.assign(new Error('Maximum tabs per session reached'), { statusCode: 429 });
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
|
|
1228
|
+
throw Object.assign(new Error('Maximum global tabs reached'), { statusCode: 429 });
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
const group = getTabGroup(session, resolvedSessionKey);
|
|
1232
|
+
|
|
1233
|
+
const page = await session.context.newPage();
|
|
1234
|
+
const tabId = crypto.randomUUID();
|
|
1235
|
+
const tabState = createTabState(page);
|
|
1236
|
+
attachDownloadListener(tabState, tabId);
|
|
1237
|
+
group.set(tabId, tabState);
|
|
1238
|
+
|
|
1239
|
+
if (url) {
|
|
1240
|
+
const urlErr = validateUrl(url);
|
|
1241
|
+
if (urlErr) throw Object.assign(new Error(urlErr), { statusCode: 400 });
|
|
1242
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
1243
|
+
tabState.visitedUrls.add(url);
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
log('info', 'tab created', { reqId: req.reqId, tabId, userId, sessionKey: resolvedSessionKey, url: page.url() });
|
|
1247
|
+
return { tabId, url: page.url() };
|
|
1248
|
+
})(), HANDLER_TIMEOUT_MS, 'tab create');
|
|
1249
|
+
|
|
1250
|
+
res.json(result);
|
|
1038
1251
|
} catch (err) {
|
|
1039
1252
|
log('error', 'tab create failed', { reqId: req.reqId, error: err.message });
|
|
1040
|
-
|
|
1253
|
+
handleRouteError(err, req, res);
|
|
1041
1254
|
}
|
|
1042
1255
|
});
|
|
1043
1256
|
|
|
@@ -1079,7 +1292,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1079
1292
|
const group = getTabGroup(session, resolvedSessionKey);
|
|
1080
1293
|
if (oldestGroup) oldestGroup.delete(oldestTabId);
|
|
1081
1294
|
group.set(tabId, tabState);
|
|
1082
|
-
tabLocks.delete(oldestTabId);
|
|
1295
|
+
{ const _l = tabLocks.get(oldestTabId); if (_l) _l.drain(); tabLocks.delete(oldestTabId); }
|
|
1083
1296
|
log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
|
|
1084
1297
|
} else {
|
|
1085
1298
|
throw new Error('Maximum tabs per session reached');
|
|
@@ -1087,6 +1300,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1087
1300
|
} else {
|
|
1088
1301
|
const page = await session.context.newPage();
|
|
1089
1302
|
tabState = createTabState(page);
|
|
1303
|
+
attachDownloadListener(tabState, tabId, log);
|
|
1090
1304
|
const group = getTabGroup(session, resolvedSessionKey);
|
|
1091
1305
|
group.set(tabId, tabState);
|
|
1092
1306
|
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
@@ -1094,7 +1308,7 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1094
1308
|
} else {
|
|
1095
1309
|
tabState = found.tabState;
|
|
1096
1310
|
}
|
|
1097
|
-
tabState.toolCalls++;
|
|
1311
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1098
1312
|
|
|
1099
1313
|
let targetUrl = url;
|
|
1100
1314
|
if (macro) {
|
|
@@ -1110,6 +1324,15 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1110
1324
|
await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
1111
1325
|
tabState.visitedUrls.add(targetUrl);
|
|
1112
1326
|
tabState.lastSnapshot = null;
|
|
1327
|
+
|
|
1328
|
+
// For Google SERP: skip eager ref building during navigate.
|
|
1329
|
+
// Results render asynchronously after DOMContentLoaded — the snapshot
|
|
1330
|
+
// call will wait for and extract them.
|
|
1331
|
+
if (isGoogleSerp(tabState.page.url())) {
|
|
1332
|
+
tabState.refs = new Map();
|
|
1333
|
+
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: false, googleSerp: true };
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1113
1336
|
tabState.refs = await buildRefs(tabState.page);
|
|
1114
1337
|
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
|
|
1115
1338
|
});
|
|
@@ -1120,7 +1343,10 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
1120
1343
|
} catch (err) {
|
|
1121
1344
|
log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
|
|
1122
1345
|
const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
|
|
1123
|
-
|
|
1346
|
+
if (status === 400) {
|
|
1347
|
+
return res.status(400).json({ error: safeError(err) });
|
|
1348
|
+
}
|
|
1349
|
+
handleRouteError(err, req, res);
|
|
1124
1350
|
}
|
|
1125
1351
|
});
|
|
1126
1352
|
|
|
@@ -1136,7 +1362,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1136
1362
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1137
1363
|
|
|
1138
1364
|
const { tabState } = found;
|
|
1139
|
-
tabState.toolCalls++;
|
|
1365
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1140
1366
|
|
|
1141
1367
|
// Cached chunk retrieval for offset>0 requests
|
|
1142
1368
|
if (offset > 0 && tabState.lastSnapshot) {
|
|
@@ -1151,6 +1377,31 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1151
1377
|
}
|
|
1152
1378
|
|
|
1153
1379
|
const result = await withUserLimit(userId, () => withTimeout((async () => {
|
|
1380
|
+
const pageUrl = tabState.page.url();
|
|
1381
|
+
|
|
1382
|
+
// Google SERP fast path — DOM extraction instead of ariaSnapshot
|
|
1383
|
+
if (isGoogleSerp(pageUrl)) {
|
|
1384
|
+
const { refs: googleRefs, snapshot: googleSnapshot } = await extractGoogleSerp(tabState.page);
|
|
1385
|
+
tabState.refs = googleRefs;
|
|
1386
|
+
tabState.lastSnapshot = googleSnapshot;
|
|
1387
|
+
const annotatedYaml = googleSnapshot;
|
|
1388
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
1389
|
+
const response = {
|
|
1390
|
+
url: pageUrl,
|
|
1391
|
+
snapshot: win.text,
|
|
1392
|
+
refsCount: tabState.refs.size,
|
|
1393
|
+
truncated: win.truncated,
|
|
1394
|
+
totalChars: win.totalChars,
|
|
1395
|
+
hasMore: win.hasMore,
|
|
1396
|
+
nextOffset: win.nextOffset,
|
|
1397
|
+
};
|
|
1398
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1399
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1400
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1401
|
+
}
|
|
1402
|
+
return response;
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1154
1405
|
tabState.refs = await buildRefs(tabState.page);
|
|
1155
1406
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
1156
1407
|
|
|
@@ -1213,7 +1464,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
1213
1464
|
res.json(result);
|
|
1214
1465
|
} catch (err) {
|
|
1215
1466
|
log('error', 'snapshot failed', { reqId: req.reqId, tabId: req.params.tabId, error: err.message });
|
|
1216
|
-
|
|
1467
|
+
handleRouteError(err, req, res);
|
|
1217
1468
|
}
|
|
1218
1469
|
});
|
|
1219
1470
|
|
|
@@ -1231,7 +1482,7 @@ app.post('/tabs/:tabId/wait', async (req, res) => {
|
|
|
1231
1482
|
res.json({ ok: true, ready });
|
|
1232
1483
|
} catch (err) {
|
|
1233
1484
|
log('error', 'wait failed', { reqId: req.reqId, error: err.message });
|
|
1234
|
-
|
|
1485
|
+
handleRouteError(err, req, res);
|
|
1235
1486
|
}
|
|
1236
1487
|
});
|
|
1237
1488
|
|
|
@@ -1247,13 +1498,15 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1247
1498
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1248
1499
|
|
|
1249
1500
|
const { tabState } = found;
|
|
1250
|
-
tabState.toolCalls++;
|
|
1501
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1251
1502
|
|
|
1252
1503
|
if (!ref && !selector) {
|
|
1253
1504
|
return res.status(400).json({ error: 'ref or selector required' });
|
|
1254
1505
|
}
|
|
1255
1506
|
|
|
1256
|
-
const result = await withUserLimit(userId, () =>
|
|
1507
|
+
const result = await withUserLimit(userId, () => withTabLock(tabId, async () => {
|
|
1508
|
+
const clickStart = Date.now();
|
|
1509
|
+
const remainingBudget = () => Math.max(0, HANDLER_TIMEOUT_MS - 2000 - (Date.now() - clickStart));
|
|
1257
1510
|
// Full mouse event sequence for stubborn JS click handlers (mirrors Swift WebView.swift)
|
|
1258
1511
|
// Dispatches: mouseover → mouseenter → mousedown → mouseup → click
|
|
1259
1512
|
const dispatchMouseSequence = async (locator) => {
|
|
@@ -1275,18 +1528,32 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1275
1528
|
log('info', 'mouse sequence dispatched', { x: x.toFixed(0), y: y.toFixed(0) });
|
|
1276
1529
|
};
|
|
1277
1530
|
|
|
1531
|
+
// On Google SERPs, skip the normal click attempt (always intercepted by overlays)
|
|
1532
|
+
// and go directly to force click — saves 5s timeout per click
|
|
1533
|
+
const onGoogleSerp = isGoogleSerp(tabState.page.url());
|
|
1534
|
+
|
|
1278
1535
|
const doClick = async (locatorOrSelector, isLocator) => {
|
|
1279
1536
|
const locator = isLocator ? locatorOrSelector : tabState.page.locator(locatorOrSelector);
|
|
1280
1537
|
|
|
1538
|
+
if (onGoogleSerp) {
|
|
1539
|
+
try {
|
|
1540
|
+
await locator.click({ timeout: 3000, force: true });
|
|
1541
|
+
} catch (forceErr) {
|
|
1542
|
+
log('warn', 'google force click failed, trying mouse sequence');
|
|
1543
|
+
await dispatchMouseSequence(locator);
|
|
1544
|
+
}
|
|
1545
|
+
return;
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1281
1548
|
try {
|
|
1282
1549
|
// First try normal click (respects visibility, enabled, not-obscured)
|
|
1283
|
-
await locator.click({ timeout:
|
|
1550
|
+
await locator.click({ timeout: 3000 });
|
|
1284
1551
|
} catch (err) {
|
|
1285
1552
|
// Fallback 1: If intercepted by overlay, retry with force
|
|
1286
1553
|
if (err.message.includes('intercepts pointer events')) {
|
|
1287
1554
|
log('warn', 'click intercepted, retrying with force');
|
|
1288
1555
|
try {
|
|
1289
|
-
await locator.click({ timeout:
|
|
1556
|
+
await locator.click({ timeout: 3000, force: true });
|
|
1290
1557
|
} catch (forceErr) {
|
|
1291
1558
|
// Fallback 2: Full mouse event sequence for stubborn JS handlers
|
|
1292
1559
|
log('warn', 'force click failed, trying mouse sequence');
|
|
@@ -1304,35 +1571,93 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
1304
1571
|
|
|
1305
1572
|
if (ref) {
|
|
1306
1573
|
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1307
|
-
if (!locator
|
|
1308
|
-
//
|
|
1309
|
-
log('info', 'auto-refreshing
|
|
1310
|
-
|
|
1574
|
+
if (!locator) {
|
|
1575
|
+
// Use tight timeout (4s max) to leave budget for click + post-click buildRefs
|
|
1576
|
+
log('info', 'auto-refreshing refs before click', { ref, hadRefs: tabState.refs.size });
|
|
1577
|
+
try {
|
|
1578
|
+
const preClickBudget = Math.min(4000, remainingBudget());
|
|
1579
|
+
const refreshPromise = buildRefs(tabState.page);
|
|
1580
|
+
const refreshBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('pre_click_refs_timeout')), preClickBudget));
|
|
1581
|
+
tabState.refs = await Promise.race([refreshPromise, refreshBudget]);
|
|
1582
|
+
} catch (e) {
|
|
1583
|
+
if (e.message === 'pre_click_refs_timeout' || e.message === 'buildRefs_timeout') {
|
|
1584
|
+
log('warn', 'pre-click buildRefs timed out, proceeding without refresh');
|
|
1585
|
+
} else {
|
|
1586
|
+
throw e;
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1311
1589
|
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1312
1590
|
}
|
|
1313
1591
|
if (!locator) {
|
|
1314
1592
|
const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none';
|
|
1315
|
-
throw new
|
|
1593
|
+
throw new StaleRefsError(ref, maxRef, tabState.refs.size);
|
|
1316
1594
|
}
|
|
1317
1595
|
await doClick(locator, true);
|
|
1318
1596
|
} else {
|
|
1319
1597
|
await doClick(selector, false);
|
|
1320
1598
|
}
|
|
1321
1599
|
|
|
1322
|
-
|
|
1600
|
+
// If clicking on a Google SERP, wait for potential navigation to complete
|
|
1601
|
+
if (onGoogleSerp) {
|
|
1602
|
+
try {
|
|
1603
|
+
await tabState.page.waitForLoadState('domcontentloaded', { timeout: 3000 });
|
|
1604
|
+
} catch {}
|
|
1605
|
+
await tabState.page.waitForTimeout(200);
|
|
1606
|
+
// Skip buildRefs here — SERP clicks typically navigate to a new page,
|
|
1607
|
+
// and the caller always requests /snapshot next which rebuilds refs.
|
|
1608
|
+
tabState.lastSnapshot = null;
|
|
1609
|
+
tabState.refs = new Map();
|
|
1610
|
+
const newUrl = tabState.page.url();
|
|
1611
|
+
tabState.visitedUrls.add(newUrl);
|
|
1612
|
+
return { ok: true, url: newUrl, refsAvailable: false };
|
|
1613
|
+
} else {
|
|
1614
|
+
await tabState.page.waitForTimeout(500);
|
|
1615
|
+
}
|
|
1323
1616
|
tabState.lastSnapshot = null;
|
|
1324
|
-
|
|
1617
|
+
// buildRefs after click — use remaining budget (min 2s) so we don't blow the handler timeout.
|
|
1618
|
+
// If it times out, return without refs (caller's next /snapshot will rebuild them).
|
|
1619
|
+
const postClickBudget = Math.max(2000, remainingBudget());
|
|
1620
|
+
try {
|
|
1621
|
+
const refsPromise = buildRefs(tabState.page);
|
|
1622
|
+
const refsBudget = new Promise((_, reject) => setTimeout(() => reject(new Error('post_click_refs_timeout')), postClickBudget));
|
|
1623
|
+
tabState.refs = await Promise.race([refsPromise, refsBudget]);
|
|
1624
|
+
} catch (e) {
|
|
1625
|
+
if (e.message === 'post_click_refs_timeout' || e.message === 'buildRefs_timeout') {
|
|
1626
|
+
log('warn', 'post-click buildRefs timed out, returning without refs', { budget: postClickBudget, elapsed: Date.now() - clickStart });
|
|
1627
|
+
tabState.refs = new Map();
|
|
1628
|
+
} else {
|
|
1629
|
+
throw e;
|
|
1630
|
+
}
|
|
1631
|
+
}
|
|
1325
1632
|
|
|
1326
1633
|
const newUrl = tabState.page.url();
|
|
1327
1634
|
tabState.visitedUrls.add(newUrl);
|
|
1328
1635
|
return { ok: true, url: newUrl, refsAvailable: tabState.refs.size > 0 };
|
|
1329
|
-
})
|
|
1636
|
+
}));
|
|
1330
1637
|
|
|
1331
1638
|
log('info', 'clicked', { reqId: req.reqId, tabId, url: result.url });
|
|
1332
1639
|
res.json(result);
|
|
1333
1640
|
} catch (err) {
|
|
1334
1641
|
log('error', 'click failed', { reqId: req.reqId, tabId, error: err.message });
|
|
1335
|
-
|
|
1642
|
+
if (err.message?.includes('timed out')) {
|
|
1643
|
+
try {
|
|
1644
|
+
const session = sessions.get(normalizeUserId(req.body.userId));
|
|
1645
|
+
const found = session && findTab(session, tabId);
|
|
1646
|
+
if (found?.tabState?.page && !found.tabState.page.isClosed()) {
|
|
1647
|
+
found.tabState.refs = await buildRefs(found.tabState.page);
|
|
1648
|
+
found.tabState.lastSnapshot = null;
|
|
1649
|
+
return res.status(500).json({
|
|
1650
|
+
error: safeError(err),
|
|
1651
|
+
hint: 'The page may have changed. Call snapshot to see the current state and retry.',
|
|
1652
|
+
url: found.tabState.page.url(),
|
|
1653
|
+
refsCount: found.tabState.refs.size,
|
|
1654
|
+
});
|
|
1655
|
+
}
|
|
1656
|
+
} catch (refreshErr) {
|
|
1657
|
+
log('warn', 'post-timeout refresh failed', { error: refreshErr.message });
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
handleRouteError(err, req, res);
|
|
1336
1661
|
}
|
|
1337
1662
|
});
|
|
1338
1663
|
|
|
@@ -1347,7 +1672,7 @@ app.post('/tabs/:tabId/type', async (req, res) => {
|
|
|
1347
1672
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1348
1673
|
|
|
1349
1674
|
const { tabState } = found;
|
|
1350
|
-
tabState.toolCalls++;
|
|
1675
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1351
1676
|
|
|
1352
1677
|
if (!ref && !selector) {
|
|
1353
1678
|
return res.status(400).json({ error: 'ref or selector required' });
|
|
@@ -1355,8 +1680,13 @@ app.post('/tabs/:tabId/type', async (req, res) => {
|
|
|
1355
1680
|
|
|
1356
1681
|
await withTabLock(tabId, async () => {
|
|
1357
1682
|
if (ref) {
|
|
1358
|
-
|
|
1359
|
-
if (!locator)
|
|
1683
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1684
|
+
if (!locator) {
|
|
1685
|
+
log('info', 'auto-refreshing refs before fill', { ref, hadRefs: tabState.refs.size });
|
|
1686
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
1687
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1688
|
+
}
|
|
1689
|
+
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
1360
1690
|
await locator.fill(text, { timeout: 10000 });
|
|
1361
1691
|
} else {
|
|
1362
1692
|
await tabState.page.fill(selector, text, { timeout: 10000 });
|
|
@@ -1366,7 +1696,25 @@ app.post('/tabs/:tabId/type', async (req, res) => {
|
|
|
1366
1696
|
res.json({ ok: true });
|
|
1367
1697
|
} catch (err) {
|
|
1368
1698
|
log('error', 'type failed', { reqId: req.reqId, error: err.message });
|
|
1369
|
-
|
|
1699
|
+
if (err.message?.includes('timed out') || err.message?.includes('not an <input>')) {
|
|
1700
|
+
try {
|
|
1701
|
+
const session = sessions.get(normalizeUserId(req.body.userId));
|
|
1702
|
+
const found = session && findTab(session, tabId);
|
|
1703
|
+
if (found?.tabState?.page && !found.tabState.page.isClosed()) {
|
|
1704
|
+
found.tabState.refs = await buildRefs(found.tabState.page);
|
|
1705
|
+
found.tabState.lastSnapshot = null;
|
|
1706
|
+
return res.status(500).json({
|
|
1707
|
+
error: safeError(err),
|
|
1708
|
+
hint: 'The page may have changed. Call snapshot to see the current state and retry.',
|
|
1709
|
+
url: found.tabState.page.url(),
|
|
1710
|
+
refsCount: found.tabState.refs.size,
|
|
1711
|
+
});
|
|
1712
|
+
}
|
|
1713
|
+
} catch (refreshErr) {
|
|
1714
|
+
log('warn', 'post-timeout refresh failed', { error: refreshErr.message });
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
handleRouteError(err, req, res);
|
|
1370
1718
|
}
|
|
1371
1719
|
});
|
|
1372
1720
|
|
|
@@ -1381,7 +1729,7 @@ app.post('/tabs/:tabId/press', async (req, res) => {
|
|
|
1381
1729
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1382
1730
|
|
|
1383
1731
|
const { tabState } = found;
|
|
1384
|
-
tabState.toolCalls++;
|
|
1732
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1385
1733
|
|
|
1386
1734
|
await withTabLock(tabId, async () => {
|
|
1387
1735
|
await tabState.page.keyboard.press(key);
|
|
@@ -1390,7 +1738,7 @@ app.post('/tabs/:tabId/press', async (req, res) => {
|
|
|
1390
1738
|
res.json({ ok: true });
|
|
1391
1739
|
} catch (err) {
|
|
1392
1740
|
log('error', 'press failed', { reqId: req.reqId, error: err.message });
|
|
1393
|
-
|
|
1741
|
+
handleRouteError(err, req, res);
|
|
1394
1742
|
}
|
|
1395
1743
|
});
|
|
1396
1744
|
|
|
@@ -1403,7 +1751,7 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
|
|
|
1403
1751
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1404
1752
|
|
|
1405
1753
|
const { tabState } = found;
|
|
1406
|
-
tabState.toolCalls++;
|
|
1754
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1407
1755
|
|
|
1408
1756
|
const delta = direction === 'up' ? -amount : amount;
|
|
1409
1757
|
await tabState.page.mouse.wheel(0, delta);
|
|
@@ -1412,7 +1760,7 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
|
|
|
1412
1760
|
res.json({ ok: true });
|
|
1413
1761
|
} catch (err) {
|
|
1414
1762
|
log('error', 'scroll failed', { reqId: req.reqId, error: err.message });
|
|
1415
|
-
|
|
1763
|
+
handleRouteError(err, req, res);
|
|
1416
1764
|
}
|
|
1417
1765
|
});
|
|
1418
1766
|
|
|
@@ -1427,18 +1775,18 @@ app.post('/tabs/:tabId/back', async (req, res) => {
|
|
|
1427
1775
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1428
1776
|
|
|
1429
1777
|
const { tabState } = found;
|
|
1430
|
-
tabState.toolCalls++;
|
|
1778
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1431
1779
|
|
|
1432
|
-
const result = await
|
|
1780
|
+
const result = await withTabLock(tabId, async () => {
|
|
1433
1781
|
await tabState.page.goBack({ timeout: 10000 });
|
|
1434
1782
|
tabState.refs = await buildRefs(tabState.page);
|
|
1435
1783
|
return { ok: true, url: tabState.page.url() };
|
|
1436
|
-
})
|
|
1784
|
+
});
|
|
1437
1785
|
|
|
1438
1786
|
res.json(result);
|
|
1439
1787
|
} catch (err) {
|
|
1440
1788
|
log('error', 'back failed', { reqId: req.reqId, error: err.message });
|
|
1441
|
-
|
|
1789
|
+
handleRouteError(err, req, res);
|
|
1442
1790
|
}
|
|
1443
1791
|
});
|
|
1444
1792
|
|
|
@@ -1453,18 +1801,18 @@ app.post('/tabs/:tabId/forward', async (req, res) => {
|
|
|
1453
1801
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1454
1802
|
|
|
1455
1803
|
const { tabState } = found;
|
|
1456
|
-
tabState.toolCalls++;
|
|
1804
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1457
1805
|
|
|
1458
|
-
const result = await
|
|
1806
|
+
const result = await withTabLock(tabId, async () => {
|
|
1459
1807
|
await tabState.page.goForward({ timeout: 10000 });
|
|
1460
1808
|
tabState.refs = await buildRefs(tabState.page);
|
|
1461
1809
|
return { ok: true, url: tabState.page.url() };
|
|
1462
|
-
})
|
|
1810
|
+
});
|
|
1463
1811
|
|
|
1464
1812
|
res.json(result);
|
|
1465
1813
|
} catch (err) {
|
|
1466
1814
|
log('error', 'forward failed', { reqId: req.reqId, error: err.message });
|
|
1467
|
-
|
|
1815
|
+
handleRouteError(err, req, res);
|
|
1468
1816
|
}
|
|
1469
1817
|
});
|
|
1470
1818
|
|
|
@@ -1479,18 +1827,18 @@ app.post('/tabs/:tabId/refresh', async (req, res) => {
|
|
|
1479
1827
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1480
1828
|
|
|
1481
1829
|
const { tabState } = found;
|
|
1482
|
-
tabState.toolCalls++;
|
|
1830
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1483
1831
|
|
|
1484
|
-
const result = await
|
|
1832
|
+
const result = await withTabLock(tabId, async () => {
|
|
1485
1833
|
await tabState.page.reload({ timeout: 30000 });
|
|
1486
1834
|
tabState.refs = await buildRefs(tabState.page);
|
|
1487
1835
|
return { ok: true, url: tabState.page.url() };
|
|
1488
|
-
})
|
|
1836
|
+
});
|
|
1489
1837
|
|
|
1490
1838
|
res.json(result);
|
|
1491
1839
|
} catch (err) {
|
|
1492
1840
|
log('error', 'refresh failed', { reqId: req.reqId, error: err.message });
|
|
1493
|
-
|
|
1841
|
+
handleRouteError(err, req, res);
|
|
1494
1842
|
}
|
|
1495
1843
|
});
|
|
1496
1844
|
|
|
@@ -1508,7 +1856,7 @@ app.get('/tabs/:tabId/links', async (req, res) => {
|
|
|
1508
1856
|
}
|
|
1509
1857
|
|
|
1510
1858
|
const { tabState } = found;
|
|
1511
|
-
tabState.toolCalls++;
|
|
1859
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1512
1860
|
|
|
1513
1861
|
const allLinks = await tabState.page.evaluate(() => {
|
|
1514
1862
|
const links = [];
|
|
@@ -1531,6 +1879,59 @@ app.get('/tabs/:tabId/links', async (req, res) => {
|
|
|
1531
1879
|
});
|
|
1532
1880
|
} catch (err) {
|
|
1533
1881
|
log('error', 'links failed', { reqId: req.reqId, error: err.message });
|
|
1882
|
+
handleRouteError(err, req, res);
|
|
1883
|
+
}
|
|
1884
|
+
});
|
|
1885
|
+
|
|
1886
|
+
// Get captured downloads
|
|
1887
|
+
app.get('/tabs/:tabId/downloads', async (req, res) => {
|
|
1888
|
+
try {
|
|
1889
|
+
const userId = req.query.userId;
|
|
1890
|
+
const includeData = req.query.includeData === 'true';
|
|
1891
|
+
const consume = req.query.consume === 'true';
|
|
1892
|
+
const maxBytesRaw = Number(req.query.maxBytes);
|
|
1893
|
+
const maxBytes = Number.isFinite(maxBytesRaw) && maxBytesRaw > 0 ? maxBytesRaw : MAX_DOWNLOAD_INLINE_BYTES;
|
|
1894
|
+
const session = sessions.get(normalizeUserId(userId));
|
|
1895
|
+
const found = session && findTab(session, req.params.tabId);
|
|
1896
|
+
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1897
|
+
|
|
1898
|
+
const { tabState } = found;
|
|
1899
|
+
tabState.toolCalls++;
|
|
1900
|
+
|
|
1901
|
+
const downloads = await getDownloadsList(tabState, { includeData, maxBytes });
|
|
1902
|
+
|
|
1903
|
+
if (consume) {
|
|
1904
|
+
await clearTabDownloads(tabState);
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
res.json({ tabId: req.params.tabId, downloads });
|
|
1908
|
+
} catch (err) {
|
|
1909
|
+
log('error', 'downloads failed', { reqId: req.reqId, error: err.message });
|
|
1910
|
+
res.status(500).json({ error: safeError(err) });
|
|
1911
|
+
}
|
|
1912
|
+
});
|
|
1913
|
+
|
|
1914
|
+
// Get image elements from current page
|
|
1915
|
+
app.get('/tabs/:tabId/images', async (req, res) => {
|
|
1916
|
+
try {
|
|
1917
|
+
const userId = req.query.userId;
|
|
1918
|
+
const includeData = req.query.includeData === 'true';
|
|
1919
|
+
const maxBytesRaw = Number(req.query.maxBytes);
|
|
1920
|
+
const limitRaw = Number(req.query.limit);
|
|
1921
|
+
const maxBytes = Number.isFinite(maxBytesRaw) && maxBytesRaw > 0 ? maxBytesRaw : MAX_DOWNLOAD_INLINE_BYTES;
|
|
1922
|
+
const limit = Number.isFinite(limitRaw) && limitRaw > 0 ? Math.min(Math.floor(limitRaw), 20) : 8;
|
|
1923
|
+
const session = sessions.get(normalizeUserId(userId));
|
|
1924
|
+
const found = session && findTab(session, req.params.tabId);
|
|
1925
|
+
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1926
|
+
|
|
1927
|
+
const { tabState } = found;
|
|
1928
|
+
tabState.toolCalls++;
|
|
1929
|
+
|
|
1930
|
+
const images = await extractPageImages(tabState.page, { includeData, maxBytes, limit });
|
|
1931
|
+
|
|
1932
|
+
res.json({ tabId: req.params.tabId, images });
|
|
1933
|
+
} catch (err) {
|
|
1934
|
+
log('error', 'images failed', { reqId: req.reqId, error: err.message });
|
|
1534
1935
|
res.status(500).json({ error: safeError(err) });
|
|
1535
1936
|
}
|
|
1536
1937
|
});
|
|
@@ -1550,7 +1951,7 @@ app.get('/tabs/:tabId/screenshot', async (req, res) => {
|
|
|
1550
1951
|
res.send(buffer);
|
|
1551
1952
|
} catch (err) {
|
|
1552
1953
|
log('error', 'screenshot failed', { reqId: req.reqId, error: err.message });
|
|
1553
|
-
|
|
1954
|
+
handleRouteError(err, req, res);
|
|
1554
1955
|
}
|
|
1555
1956
|
});
|
|
1556
1957
|
|
|
@@ -1569,11 +1970,36 @@ app.get('/tabs/:tabId/stats', async (req, res) => {
|
|
|
1569
1970
|
listItemId, // Legacy compatibility
|
|
1570
1971
|
url: tabState.page.url(),
|
|
1571
1972
|
visitedUrls: Array.from(tabState.visitedUrls),
|
|
1973
|
+
downloadsCount: Array.isArray(tabState.downloads) ? tabState.downloads.length : 0,
|
|
1572
1974
|
toolCalls: tabState.toolCalls,
|
|
1573
1975
|
refsCount: tabState.refs.size
|
|
1574
1976
|
});
|
|
1575
1977
|
} catch (err) {
|
|
1576
1978
|
log('error', 'stats failed', { reqId: req.reqId, error: err.message });
|
|
1979
|
+
handleRouteError(err, req, res);
|
|
1980
|
+
}
|
|
1981
|
+
});
|
|
1982
|
+
|
|
1983
|
+
// Evaluate JavaScript in page context
|
|
1984
|
+
app.post('/tabs/:tabId/evaluate', express.json({ limit: '1mb' }), async (req, res) => {
|
|
1985
|
+
try {
|
|
1986
|
+
const { userId, expression } = req.body;
|
|
1987
|
+
if (!userId) return res.status(400).json({ error: 'userId is required' });
|
|
1988
|
+
if (!expression) return res.status(400).json({ error: 'expression is required' });
|
|
1989
|
+
|
|
1990
|
+
const session = sessions.get(normalizeUserId(userId));
|
|
1991
|
+
const found = session && findTab(session, req.params.tabId);
|
|
1992
|
+
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
1993
|
+
|
|
1994
|
+
session.lastAccess = Date.now();
|
|
1995
|
+
const { tabState } = found;
|
|
1996
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1997
|
+
|
|
1998
|
+
const result = await tabState.page.evaluate(expression);
|
|
1999
|
+
log('info', 'evaluate', { reqId: req.reqId, tabId: req.params.tabId, userId, resultType: typeof result });
|
|
2000
|
+
res.json({ ok: true, result });
|
|
2001
|
+
} catch (err) {
|
|
2002
|
+
log('error', 'evaluate failed', { reqId: req.reqId, error: err.message });
|
|
1577
2003
|
res.status(500).json({ error: safeError(err) });
|
|
1578
2004
|
}
|
|
1579
2005
|
});
|
|
@@ -1585,9 +2011,10 @@ app.delete('/tabs/:tabId', async (req, res) => {
|
|
|
1585
2011
|
const session = sessions.get(normalizeUserId(userId));
|
|
1586
2012
|
const found = session && findTab(session, req.params.tabId);
|
|
1587
2013
|
if (found) {
|
|
2014
|
+
await clearTabDownloads(found.tabState);
|
|
1588
2015
|
await safePageClose(found.tabState.page);
|
|
1589
2016
|
found.group.delete(req.params.tabId);
|
|
1590
|
-
tabLocks.delete(req.params.tabId);
|
|
2017
|
+
{ const _l = tabLocks.get(req.params.tabId); if (_l) _l.drain(); tabLocks.delete(req.params.tabId); }
|
|
1591
2018
|
if (found.group.size === 0) {
|
|
1592
2019
|
session.tabGroups.delete(found.listItemId);
|
|
1593
2020
|
}
|
|
@@ -1596,7 +2023,7 @@ app.delete('/tabs/:tabId', async (req, res) => {
|
|
|
1596
2023
|
res.json({ ok: true });
|
|
1597
2024
|
} catch (err) {
|
|
1598
2025
|
log('error', 'tab close failed', { reqId: req.reqId, error: err.message });
|
|
1599
|
-
|
|
2026
|
+
handleRouteError(err, req, res);
|
|
1600
2027
|
}
|
|
1601
2028
|
});
|
|
1602
2029
|
|
|
@@ -1608,6 +2035,7 @@ app.delete('/tabs/group/:listItemId', async (req, res) => {
|
|
|
1608
2035
|
const group = session?.tabGroups.get(req.params.listItemId);
|
|
1609
2036
|
if (group) {
|
|
1610
2037
|
for (const [tabId, tabState] of group) {
|
|
2038
|
+
await clearTabDownloads(tabState);
|
|
1611
2039
|
await safePageClose(tabState.page);
|
|
1612
2040
|
tabLocks.delete(tabId);
|
|
1613
2041
|
}
|
|
@@ -1617,7 +2045,7 @@ app.delete('/tabs/group/:listItemId', async (req, res) => {
|
|
|
1617
2045
|
res.json({ ok: true });
|
|
1618
2046
|
} catch (err) {
|
|
1619
2047
|
log('error', 'tab group close failed', { reqId: req.reqId, error: err.message });
|
|
1620
|
-
|
|
2048
|
+
handleRouteError(err, req, res);
|
|
1621
2049
|
}
|
|
1622
2050
|
});
|
|
1623
2051
|
|
|
@@ -1627,6 +2055,7 @@ app.delete('/sessions/:userId', async (req, res) => {
|
|
|
1627
2055
|
const userId = normalizeUserId(req.params.userId);
|
|
1628
2056
|
const session = sessions.get(userId);
|
|
1629
2057
|
if (session) {
|
|
2058
|
+
await clearSessionDownloads(session);
|
|
1630
2059
|
await session.context.close();
|
|
1631
2060
|
sessions.delete(userId);
|
|
1632
2061
|
log('info', 'session closed', { userId });
|
|
@@ -1635,7 +2064,7 @@ app.delete('/sessions/:userId', async (req, res) => {
|
|
|
1635
2064
|
res.json({ ok: true });
|
|
1636
2065
|
} catch (err) {
|
|
1637
2066
|
log('error', 'session close failed', { error: err.message });
|
|
1638
|
-
|
|
2067
|
+
handleRouteError(err, req, res);
|
|
1639
2068
|
}
|
|
1640
2069
|
});
|
|
1641
2070
|
|
|
@@ -1644,6 +2073,7 @@ setInterval(() => {
|
|
|
1644
2073
|
const now = Date.now();
|
|
1645
2074
|
for (const [userId, session] of sessions) {
|
|
1646
2075
|
if (now - session.lastAccess > SESSION_TIMEOUT_MS) {
|
|
2076
|
+
clearSessionDownloads(session).catch(() => {});
|
|
1647
2077
|
session.context.close().catch(() => {});
|
|
1648
2078
|
sessions.delete(userId);
|
|
1649
2079
|
log('info', 'session expired', { userId });
|
|
@@ -1655,6 +2085,37 @@ setInterval(() => {
|
|
|
1655
2085
|
}
|
|
1656
2086
|
}, 60_000);
|
|
1657
2087
|
|
|
2088
|
+
// Per-tab inactivity reaper — close tabs idle for TAB_INACTIVITY_MS
|
|
2089
|
+
setInterval(() => {
|
|
2090
|
+
const now = Date.now();
|
|
2091
|
+
for (const [userId, session] of sessions) {
|
|
2092
|
+
for (const [listItemId, group] of session.tabGroups) {
|
|
2093
|
+
for (const [tabId, tabState] of group) {
|
|
2094
|
+
if (!tabState._lastReaperCheck) {
|
|
2095
|
+
tabState._lastReaperCheck = now;
|
|
2096
|
+
tabState._lastReaperToolCalls = tabState.toolCalls;
|
|
2097
|
+
continue;
|
|
2098
|
+
}
|
|
2099
|
+
if (tabState.toolCalls === tabState._lastReaperToolCalls) {
|
|
2100
|
+
const idleMs = now - tabState._lastReaperCheck;
|
|
2101
|
+
if (idleMs >= TAB_INACTIVITY_MS) {
|
|
2102
|
+
log('info', 'tab reaped (inactive)', { userId, tabId, listItemId, idleMs, toolCalls: tabState.toolCalls });
|
|
2103
|
+
safePageClose(tabState.page);
|
|
2104
|
+
group.delete(tabId);
|
|
2105
|
+
{ const _l = tabLocks.get(tabId); if (_l) _l.drain(); tabLocks.delete(tabId); }
|
|
2106
|
+
}
|
|
2107
|
+
} else {
|
|
2108
|
+
tabState._lastReaperCheck = now;
|
|
2109
|
+
tabState._lastReaperToolCalls = tabState.toolCalls;
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
if (group.size === 0) {
|
|
2113
|
+
session.tabGroups.delete(listItemId);
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
2117
|
+
}, 60_000);
|
|
2118
|
+
|
|
1658
2119
|
// =============================================================================
|
|
1659
2120
|
// OpenClaw-compatible endpoint aliases
|
|
1660
2121
|
// These allow camoufox to be used as a profile backend for OpenClaw's browser tool
|
|
@@ -1699,7 +2160,7 @@ app.get('/tabs', async (req, res) => {
|
|
|
1699
2160
|
res.json({ running: true, tabs });
|
|
1700
2161
|
} catch (err) {
|
|
1701
2162
|
log('error', 'list tabs failed', { reqId: req.reqId, error: err.message });
|
|
1702
|
-
|
|
2163
|
+
handleRouteError(err, req, res);
|
|
1703
2164
|
}
|
|
1704
2165
|
});
|
|
1705
2166
|
|
|
@@ -1719,6 +2180,11 @@ app.post('/tabs/open', async (req, res) => {
|
|
|
1719
2180
|
|
|
1720
2181
|
const session = await getSession(userId);
|
|
1721
2182
|
|
|
2183
|
+
// Check global tab limit first
|
|
2184
|
+
if (getTotalTabCount() >= MAX_TABS_GLOBAL) {
|
|
2185
|
+
return res.status(429).json({ error: 'Maximum global tabs reached' });
|
|
2186
|
+
}
|
|
2187
|
+
|
|
1722
2188
|
let totalTabs = 0;
|
|
1723
2189
|
for (const g of session.tabGroups.values()) totalTabs += g.size;
|
|
1724
2190
|
if (totalTabs >= MAX_TABS_PER_SESSION) {
|
|
@@ -1730,6 +2196,7 @@ app.post('/tabs/open', async (req, res) => {
|
|
|
1730
2196
|
const page = await session.context.newPage();
|
|
1731
2197
|
const tabId = crypto.randomUUID();
|
|
1732
2198
|
const tabState = createTabState(page);
|
|
2199
|
+
attachDownloadListener(tabState, tabId, log);
|
|
1733
2200
|
group.set(tabId, tabState);
|
|
1734
2201
|
|
|
1735
2202
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
@@ -1745,7 +2212,7 @@ app.post('/tabs/open', async (req, res) => {
|
|
|
1745
2212
|
});
|
|
1746
2213
|
} catch (err) {
|
|
1747
2214
|
log('error', 'openclaw tab open failed', { reqId: req.reqId, error: err.message });
|
|
1748
|
-
|
|
2215
|
+
handleRouteError(err, req, res);
|
|
1749
2216
|
}
|
|
1750
2217
|
});
|
|
1751
2218
|
|
|
@@ -1770,6 +2237,11 @@ app.post('/stop', async (req, res) => {
|
|
|
1770
2237
|
await browser.close().catch(() => {});
|
|
1771
2238
|
browser = null;
|
|
1772
2239
|
}
|
|
2240
|
+
const cleanupTasks = [];
|
|
2241
|
+
for (const session of sessions.values()) {
|
|
2242
|
+
cleanupTasks.push(clearSessionDownloads(session));
|
|
2243
|
+
}
|
|
2244
|
+
await Promise.all(cleanupTasks);
|
|
1773
2245
|
sessions.clear();
|
|
1774
2246
|
res.json({ ok: true, stopped: true, profile: 'camoufox' });
|
|
1775
2247
|
} catch (err) {
|
|
@@ -1798,19 +2270,27 @@ app.post('/navigate', async (req, res) => {
|
|
|
1798
2270
|
}
|
|
1799
2271
|
|
|
1800
2272
|
const { tabState } = found;
|
|
1801
|
-
tabState.toolCalls++;
|
|
2273
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1802
2274
|
|
|
1803
|
-
const result = await
|
|
2275
|
+
const result = await withTabLock(targetId, async () => {
|
|
1804
2276
|
await tabState.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
1805
2277
|
tabState.visitedUrls.add(url);
|
|
2278
|
+
tabState.lastSnapshot = null;
|
|
2279
|
+
|
|
2280
|
+
// Google SERP: defer extraction to snapshot call
|
|
2281
|
+
if (isGoogleSerp(tabState.page.url())) {
|
|
2282
|
+
tabState.refs = new Map();
|
|
2283
|
+
return { ok: true, targetId, url: tabState.page.url(), googleSerp: true };
|
|
2284
|
+
}
|
|
2285
|
+
|
|
1806
2286
|
tabState.refs = await buildRefs(tabState.page);
|
|
1807
2287
|
return { ok: true, targetId, url: tabState.page.url() };
|
|
1808
|
-
})
|
|
2288
|
+
});
|
|
1809
2289
|
|
|
1810
2290
|
res.json(result);
|
|
1811
2291
|
} catch (err) {
|
|
1812
2292
|
log('error', 'openclaw navigate failed', { reqId: req.reqId, error: err.message });
|
|
1813
|
-
|
|
2293
|
+
handleRouteError(err, req, res);
|
|
1814
2294
|
}
|
|
1815
2295
|
});
|
|
1816
2296
|
|
|
@@ -1830,7 +2310,7 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1830
2310
|
}
|
|
1831
2311
|
|
|
1832
2312
|
const { tabState } = found;
|
|
1833
|
-
tabState.toolCalls++;
|
|
2313
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1834
2314
|
|
|
1835
2315
|
// Cached chunk retrieval
|
|
1836
2316
|
if (offset > 0 && tabState.lastSnapshot) {
|
|
@@ -1843,6 +2323,28 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1843
2323
|
return res.json(response);
|
|
1844
2324
|
}
|
|
1845
2325
|
|
|
2326
|
+
const pageUrl = tabState.page.url();
|
|
2327
|
+
|
|
2328
|
+
// Google SERP fast path
|
|
2329
|
+
if (isGoogleSerp(pageUrl)) {
|
|
2330
|
+
const { refs: googleRefs, snapshot: googleSnapshot } = await extractGoogleSerp(tabState.page);
|
|
2331
|
+
tabState.refs = googleRefs;
|
|
2332
|
+
tabState.lastSnapshot = googleSnapshot;
|
|
2333
|
+
const annotatedYaml = googleSnapshot;
|
|
2334
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
2335
|
+
const response = {
|
|
2336
|
+
ok: true, format: 'aria', targetId, url: pageUrl,
|
|
2337
|
+
snapshot: win.text, refsCount: tabState.refs.size,
|
|
2338
|
+
truncated: win.truncated, totalChars: win.totalChars,
|
|
2339
|
+
hasMore: win.hasMore, nextOffset: win.nextOffset,
|
|
2340
|
+
};
|
|
2341
|
+
if (req.query.includeScreenshot === 'true') {
|
|
2342
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
2343
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
2344
|
+
}
|
|
2345
|
+
return res.json(response);
|
|
2346
|
+
}
|
|
2347
|
+
|
|
1846
2348
|
tabState.refs = await buildRefs(tabState.page);
|
|
1847
2349
|
|
|
1848
2350
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
@@ -1895,7 +2397,7 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1895
2397
|
res.json(response);
|
|
1896
2398
|
} catch (err) {
|
|
1897
2399
|
log('error', 'openclaw snapshot failed', { reqId: req.reqId, error: err.message });
|
|
1898
|
-
|
|
2400
|
+
handleRouteError(err, req, res);
|
|
1899
2401
|
}
|
|
1900
2402
|
});
|
|
1901
2403
|
|
|
@@ -1919,9 +2421,9 @@ app.post('/act', async (req, res) => {
|
|
|
1919
2421
|
}
|
|
1920
2422
|
|
|
1921
2423
|
const { tabState } = found;
|
|
1922
|
-
tabState.toolCalls++;
|
|
2424
|
+
tabState.toolCalls++; tabState.consecutiveTimeouts = 0;
|
|
1923
2425
|
|
|
1924
|
-
const result = await
|
|
2426
|
+
const result = await withTabLock(targetId, async () => {
|
|
1925
2427
|
switch (kind) {
|
|
1926
2428
|
case 'click': {
|
|
1927
2429
|
const { ref, selector, doubleClick } = params;
|
|
@@ -1931,7 +2433,7 @@ app.post('/act', async (req, res) => {
|
|
|
1931
2433
|
|
|
1932
2434
|
const doClick = async (locatorOrSelector, isLocator) => {
|
|
1933
2435
|
const locator = isLocator ? locatorOrSelector : tabState.page.locator(locatorOrSelector);
|
|
1934
|
-
const clickOpts = { timeout:
|
|
2436
|
+
const clickOpts = { timeout: 3000 };
|
|
1935
2437
|
if (doubleClick) clickOpts.clickCount = 2;
|
|
1936
2438
|
|
|
1937
2439
|
try {
|
|
@@ -1946,8 +2448,13 @@ app.post('/act', async (req, res) => {
|
|
|
1946
2448
|
};
|
|
1947
2449
|
|
|
1948
2450
|
if (ref) {
|
|
1949
|
-
|
|
1950
|
-
if (!locator)
|
|
2451
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2452
|
+
if (!locator) {
|
|
2453
|
+
log('info', 'auto-refreshing refs before click (openclaw)', { ref, hadRefs: tabState.refs.size });
|
|
2454
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
2455
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2456
|
+
}
|
|
2457
|
+
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
1951
2458
|
await doClick(locator, true);
|
|
1952
2459
|
} else {
|
|
1953
2460
|
await doClick(selector, false);
|
|
@@ -1968,8 +2475,13 @@ app.post('/act', async (req, res) => {
|
|
|
1968
2475
|
}
|
|
1969
2476
|
|
|
1970
2477
|
if (ref) {
|
|
1971
|
-
|
|
1972
|
-
if (!locator)
|
|
2478
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2479
|
+
if (!locator) {
|
|
2480
|
+
log('info', 'auto-refreshing refs before type (openclaw)', { ref, hadRefs: tabState.refs.size });
|
|
2481
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
2482
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2483
|
+
}
|
|
2484
|
+
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
1973
2485
|
await locator.fill(text, { timeout: 10000 });
|
|
1974
2486
|
if (submit) await tabState.page.keyboard.press('Enter');
|
|
1975
2487
|
} else {
|
|
@@ -1990,8 +2502,12 @@ app.post('/act', async (req, res) => {
|
|
|
1990
2502
|
case 'scrollIntoView': {
|
|
1991
2503
|
const { ref, direction = 'down', amount = 500 } = params;
|
|
1992
2504
|
if (ref) {
|
|
1993
|
-
|
|
1994
|
-
if (!locator)
|
|
2505
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2506
|
+
if (!locator) {
|
|
2507
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
2508
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2509
|
+
}
|
|
2510
|
+
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
1995
2511
|
await locator.scrollIntoViewIfNeeded({ timeout: 5000 });
|
|
1996
2512
|
} else {
|
|
1997
2513
|
const delta = direction === 'up' ? -amount : amount;
|
|
@@ -2006,8 +2522,12 @@ app.post('/act', async (req, res) => {
|
|
|
2006
2522
|
if (!ref && !selector) throw new Error('ref or selector required');
|
|
2007
2523
|
|
|
2008
2524
|
if (ref) {
|
|
2009
|
-
|
|
2010
|
-
if (!locator)
|
|
2525
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2526
|
+
if (!locator) {
|
|
2527
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
2528
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
2529
|
+
}
|
|
2530
|
+
if (!locator) { const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none'; throw new StaleRefsError(ref, maxRef, tabState.refs.size); }
|
|
2011
2531
|
await locator.hover({ timeout: 5000 });
|
|
2012
2532
|
} else {
|
|
2013
2533
|
await tabState.page.locator(selector).hover({ timeout: 5000 });
|
|
@@ -2030,19 +2550,19 @@ app.post('/act', async (req, res) => {
|
|
|
2030
2550
|
case 'close': {
|
|
2031
2551
|
await safePageClose(tabState.page);
|
|
2032
2552
|
found.group.delete(targetId);
|
|
2033
|
-
tabLocks.delete(targetId);
|
|
2553
|
+
{ const _l = tabLocks.get(targetId); if (_l) _l.drain(); tabLocks.delete(targetId); }
|
|
2034
2554
|
return { ok: true, targetId };
|
|
2035
2555
|
}
|
|
2036
2556
|
|
|
2037
2557
|
default:
|
|
2038
2558
|
throw new Error(`Unsupported action kind: ${kind}`);
|
|
2039
2559
|
}
|
|
2040
|
-
})
|
|
2560
|
+
});
|
|
2041
2561
|
|
|
2042
2562
|
res.json(result);
|
|
2043
2563
|
} catch (err) {
|
|
2044
2564
|
log('error', 'act failed', { reqId: req.reqId, kind: req.body?.kind, error: err.message });
|
|
2045
|
-
|
|
2565
|
+
handleRouteError(err, req, res);
|
|
2046
2566
|
}
|
|
2047
2567
|
});
|
|
2048
2568
|
|
|
@@ -2068,14 +2588,20 @@ setInterval(() => {
|
|
|
2068
2588
|
// Active health probe — detect hung browser even when isConnected() lies
|
|
2069
2589
|
setInterval(async () => {
|
|
2070
2590
|
if (!browser || healthState.isRecovering) return;
|
|
2071
|
-
|
|
2072
|
-
if
|
|
2591
|
+
const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
|
|
2592
|
+
// Skip probe if operations are in flight AND last success was recent.
|
|
2593
|
+
// If it's been >120s since any successful operation, probe anyway —
|
|
2594
|
+
// active ops are likely stuck on a frozen browser and will time out eventually.
|
|
2595
|
+
if (healthState.activeOps > 0 && timeSinceSuccess < 120000) {
|
|
2073
2596
|
log('info', 'health probe skipped, operations active', { activeOps: healthState.activeOps });
|
|
2074
2597
|
return;
|
|
2075
2598
|
}
|
|
2076
|
-
const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
|
|
2077
2599
|
if (timeSinceSuccess < 120000) return;
|
|
2078
2600
|
|
|
2601
|
+
if (healthState.activeOps > 0) {
|
|
2602
|
+
log('warn', 'health probe forced despite active ops', { activeOps: healthState.activeOps, timeSinceSuccessMs: timeSinceSuccess });
|
|
2603
|
+
}
|
|
2604
|
+
|
|
2079
2605
|
let testContext;
|
|
2080
2606
|
try {
|
|
2081
2607
|
testContext = await browser.newContext();
|
|
@@ -2127,9 +2653,16 @@ process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
|
|
|
2127
2653
|
process.on('SIGINT', () => gracefulShutdown('SIGINT'));
|
|
2128
2654
|
|
|
2129
2655
|
const PORT = CONFIG.port;
|
|
2130
|
-
const server = app.listen(PORT, () => {
|
|
2656
|
+
const server = app.listen(PORT, async () => {
|
|
2131
2657
|
log('info', 'server started', { port: PORT, pid: process.pid, nodeVersion: process.version });
|
|
2132
|
-
//
|
|
2658
|
+
// Pre-warm browser so first request doesn't eat a 6-7s cold start
|
|
2659
|
+
try {
|
|
2660
|
+
const start = Date.now();
|
|
2661
|
+
await ensureBrowser();
|
|
2662
|
+
log('info', 'browser pre-warmed', { ms: Date.now() - start });
|
|
2663
|
+
} catch (err) {
|
|
2664
|
+
log('error', 'browser pre-warm failed (will retry on first request)', { error: err.message });
|
|
2665
|
+
}
|
|
2133
2666
|
});
|
|
2134
2667
|
|
|
2135
2668
|
server.on('error', (err) => {
|