@askjo/camofox-browser 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +6 -0
- package/README.md +27 -1
- package/lib/snapshot.js +41 -0
- package/package.json +6 -3
- package/plugin.ts +16 -5
- package/server.js +545 -38
package/Dockerfile
CHANGED
|
@@ -31,8 +31,14 @@ RUN apt-get update && apt-get install -y \
|
|
|
31
31
|
ca-certificates \
|
|
32
32
|
curl \
|
|
33
33
|
unzip \
|
|
34
|
+
# yt-dlp runtime dependency
|
|
35
|
+
python3-minimal \
|
|
34
36
|
&& rm -rf /var/lib/apt/lists/*
|
|
35
37
|
|
|
38
|
+
# Install yt-dlp for YouTube transcript extraction (no browser needed)
|
|
39
|
+
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
|
|
40
|
+
&& chmod +x /usr/local/bin/yt-dlp
|
|
41
|
+
|
|
36
42
|
# Pre-bake Camoufox browser binary into image
|
|
37
43
|
# This avoids downloading at runtime and pins the version
|
|
38
44
|
# Note: unzip returns exit code 1 for warnings (Unicode filenames), so we use || true and verify
|
package/README.md
CHANGED
|
@@ -37,9 +37,20 @@ This project wraps that engine in a REST API built for agents: accessibility sna
|
|
|
37
37
|
- **Cookie Import** - inject Netscape-format cookie files for authenticated browsing
|
|
38
38
|
- **Proxy + GeoIP** - route traffic through residential proxies with automatic locale/timezone
|
|
39
39
|
- **Structured Logging** - JSON log lines with request IDs for production observability
|
|
40
|
+
- **YouTube Transcripts** - extract captions from any YouTube video via yt-dlp, no API key needed
|
|
40
41
|
- **Search Macros** - `@google_search`, `@youtube_search`, `@amazon_search`, `@reddit_subreddit`, and 10 more
|
|
42
|
+
- **Snapshot Screenshots** - include a base64 PNG screenshot alongside the accessibility snapshot
|
|
43
|
+
- **Large Page Handling** - automatic snapshot truncation with offset-based pagination
|
|
41
44
|
- **Deploy Anywhere** - Docker, Fly.io, Railway
|
|
42
45
|
|
|
46
|
+
## Optional Dependencies
|
|
47
|
+
|
|
48
|
+
| Dependency | Purpose | Install |
|
|
49
|
+
|-----------|---------|---------|
|
|
50
|
+
| [yt-dlp](https://github.com/yt-dlp/yt-dlp) | YouTube transcript extraction (fast path) | `pip install yt-dlp` or `brew install yt-dlp` |
|
|
51
|
+
|
|
52
|
+
The Docker image includes yt-dlp. For local dev, install it for the `/youtube/transcript` endpoint. Without it, the endpoint falls back to a slower browser-based method.
|
|
53
|
+
|
|
43
54
|
## Quick Start
|
|
44
55
|
|
|
45
56
|
### OpenClaw Plugin
|
|
@@ -252,7 +263,7 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
252
263
|
|
|
253
264
|
| Method | Endpoint | Description |
|
|
254
265
|
|--------|----------|-------------|
|
|
255
|
-
| `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs |
|
|
266
|
+
| `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs. Query params: `includeScreenshot=true` (add base64 PNG), `offset=N` (paginate large snapshots) |
|
|
256
267
|
| `POST` | `/tabs/:id/click` | Click element by ref or CSS selector |
|
|
257
268
|
| `POST` | `/tabs/:id/type` | Type text into element |
|
|
258
269
|
| `POST` | `/tabs/:id/press` | Press a keyboard key |
|
|
@@ -265,6 +276,21 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
265
276
|
| `POST` | `/tabs/:id/forward` | Go forward |
|
|
266
277
|
| `POST` | `/tabs/:id/refresh` | Refresh page |
|
|
267
278
|
|
|
279
|
+
### YouTube Transcript
|
|
280
|
+
|
|
281
|
+
| Method | Endpoint | Description |
|
|
282
|
+
|--------|----------|-------------|
|
|
283
|
+
| `POST` | `/youtube/transcript` | Extract captions from a YouTube video |
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
curl -X POST http://localhost:9377/youtube/transcript \
|
|
287
|
+
-H 'Content-Type: application/json' \
|
|
288
|
+
-d '{"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "languages": ["en"]}'
|
|
289
|
+
# → { "status": "ok", "transcript": "[00:18] ♪ We're no strangers to love ♪\n...", "video_title": "...", "total_words": 548 }
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Uses [yt-dlp](https://github.com/yt-dlp/yt-dlp) when available (fast, no browser needed). Falls back to a browser-based intercept method if yt-dlp is not installed — this is slower and less reliable due to YouTube ad pre-rolls.
|
|
293
|
+
|
|
268
294
|
### Server
|
|
269
295
|
|
|
270
296
|
| Method | Endpoint | Description |
|
package/lib/snapshot.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snapshot windowing — truncate large accessibility snapshots while
|
|
3
|
+
* preserving pagination/navigation links at the tail.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const MAX_SNAPSHOT_CHARS = 80000; // ~20K tokens
|
|
7
|
+
const SNAPSHOT_TAIL_CHARS = 5000; // keep last ~5K for pagination/nav links
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Return a window of the snapshot YAML.
|
|
11
|
+
* offset=0 (default): head chunk + tail (pagination/nav).
|
|
12
|
+
* offset=N: chars N..N+budget from the full snapshot.
|
|
13
|
+
* Always appends pagination tail so nav refs are available in every chunk.
|
|
14
|
+
*/
|
|
15
|
+
function windowSnapshot(yaml, offset = 0) {
|
|
16
|
+
if (!yaml) return { text: '', truncated: false, totalChars: 0, offset: 0 };
|
|
17
|
+
const total = yaml.length;
|
|
18
|
+
if (total <= MAX_SNAPSHOT_CHARS) return { text: yaml, truncated: false, totalChars: total, offset: 0 };
|
|
19
|
+
|
|
20
|
+
const contentBudget = MAX_SNAPSHOT_CHARS - SNAPSHOT_TAIL_CHARS - 200; // room for marker
|
|
21
|
+
const tail = yaml.slice(-SNAPSHOT_TAIL_CHARS);
|
|
22
|
+
const clampedOffset = Math.min(Math.max(0, offset), total - SNAPSHOT_TAIL_CHARS);
|
|
23
|
+
const chunk = yaml.slice(clampedOffset, clampedOffset + contentBudget);
|
|
24
|
+
const chunkEnd = clampedOffset + contentBudget;
|
|
25
|
+
const hasMore = chunkEnd < total - SNAPSHOT_TAIL_CHARS;
|
|
26
|
+
|
|
27
|
+
const marker = hasMore
|
|
28
|
+
? `\n[... truncated at char ${chunkEnd} of ${total}. Call snapshot with offset=${chunkEnd} to see more. Pagination links below. ...]\n`
|
|
29
|
+
: '\n';
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
text: chunk + marker + tail,
|
|
33
|
+
truncated: true,
|
|
34
|
+
totalChars: total,
|
|
35
|
+
offset: clampedOffset,
|
|
36
|
+
hasMore,
|
|
37
|
+
nextOffset: hasMore ? chunkEnd : null
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
module.exports = { windowSnapshot, MAX_SNAPSHOT_CHARS, SNAPSHOT_TAIL_CHARS };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askjo/camofox-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"license": "MIT",
|
|
@@ -26,7 +26,9 @@
|
|
|
26
26
|
"clawdbot",
|
|
27
27
|
"moltbot",
|
|
28
28
|
"playwright",
|
|
29
|
-
"firefox"
|
|
29
|
+
"firefox",
|
|
30
|
+
"youtube",
|
|
31
|
+
"transcript"
|
|
30
32
|
],
|
|
31
33
|
"engines": {
|
|
32
34
|
"node": ">=18"
|
|
@@ -63,6 +65,7 @@
|
|
|
63
65
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
64
66
|
},
|
|
65
67
|
"devDependencies": {
|
|
66
|
-
"jest": "^29.7.0"
|
|
68
|
+
"jest": "^29.7.0",
|
|
69
|
+
"pngjs": "^7.0.0"
|
|
67
70
|
}
|
|
68
71
|
}
|
package/plugin.ts
CHANGED
|
@@ -37,7 +37,7 @@ interface PluginConfig {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
interface ToolResult {
|
|
40
|
-
content: Array<{ type: string; text
|
|
40
|
+
content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
interface HealthCheckResult {
|
|
@@ -238,19 +238,30 @@ export default function register(api: PluginApi) {
|
|
|
238
238
|
api.registerTool((ctx: ToolContext) => ({
|
|
239
239
|
name: "camofox_snapshot",
|
|
240
240
|
description:
|
|
241
|
-
"Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction
|
|
241
|
+
"Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction, plus a visual screenshot. " +
|
|
242
|
+
"Large pages are truncated with pagination links preserved at the bottom. " +
|
|
243
|
+
"If the response includes hasMore=true and nextOffset, call again with that offset to see more content.",
|
|
242
244
|
parameters: {
|
|
243
245
|
type: "object",
|
|
244
246
|
properties: {
|
|
245
247
|
tabId: { type: "string", description: "Tab identifier" },
|
|
248
|
+
offset: { type: "number", description: "Character offset for paginated snapshots. Use nextOffset from a previous truncated response." },
|
|
246
249
|
},
|
|
247
250
|
required: ["tabId"],
|
|
248
251
|
},
|
|
249
252
|
async execute(_id, params) {
|
|
250
|
-
const { tabId } = params as { tabId: string };
|
|
253
|
+
const { tabId, offset } = params as { tabId: string; offset?: number };
|
|
251
254
|
const userId = ctx.agentId || fallbackUserId;
|
|
252
|
-
const
|
|
253
|
-
|
|
255
|
+
const qs = offset ? `&offset=${offset}` : '';
|
|
256
|
+
const result = await fetchApi(baseUrl, `/tabs/${tabId}/snapshot?userId=${userId}&includeScreenshot=true${qs}`) as Record<string, unknown>;
|
|
257
|
+
const content: ToolResult["content"] = [
|
|
258
|
+
{ type: "text", text: JSON.stringify({ url: result.url, refsCount: result.refsCount, snapshot: result.snapshot, truncated: result.truncated, totalChars: result.totalChars, hasMore: result.hasMore, nextOffset: result.nextOffset }, null, 2) },
|
|
259
|
+
];
|
|
260
|
+
const screenshot = result.screenshot as { data?: string; mimeType?: string } | undefined;
|
|
261
|
+
if (screenshot?.data) {
|
|
262
|
+
content.push({ type: "image", data: screenshot.data, mimeType: screenshot.mimeType || "image/png" });
|
|
263
|
+
}
|
|
264
|
+
return { content };
|
|
254
265
|
},
|
|
255
266
|
}));
|
|
256
267
|
|
package/server.js
CHANGED
|
@@ -5,6 +5,7 @@ const crypto = require('crypto');
|
|
|
5
5
|
const os = require('os');
|
|
6
6
|
const { expandMacro } = require('./lib/macros');
|
|
7
7
|
const { loadConfig } = require('./lib/config');
|
|
8
|
+
const { windowSnapshot } = require('./lib/snapshot');
|
|
8
9
|
|
|
9
10
|
const CONFIG = loadConfig();
|
|
10
11
|
|
|
@@ -175,9 +176,14 @@ const SESSION_TIMEOUT_MS = parseInt(process.env.SESSION_TIMEOUT_MS) || 1800000;
|
|
|
175
176
|
const MAX_SNAPSHOT_NODES = 500;
|
|
176
177
|
const MAX_SESSIONS = parseInt(process.env.MAX_SESSIONS) || 50;
|
|
177
178
|
const MAX_TABS_PER_SESSION = parseInt(process.env.MAX_TABS_PER_SESSION) || 10;
|
|
179
|
+
const MAX_TABS_GLOBAL = parseInt(process.env.MAX_TABS_GLOBAL) || 10;
|
|
178
180
|
const HANDLER_TIMEOUT_MS = parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000;
|
|
179
181
|
const MAX_CONCURRENT_PER_USER = parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3;
|
|
180
182
|
const PAGE_CLOSE_TIMEOUT_MS = 5000;
|
|
183
|
+
const NAVIGATE_TIMEOUT_MS = parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000;
|
|
184
|
+
const BUILDREFS_TIMEOUT_MS = parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000;
|
|
185
|
+
const FAILURE_THRESHOLD = 3;
|
|
186
|
+
const TAB_LOCK_TIMEOUT_MS = 30000;
|
|
181
187
|
|
|
182
188
|
// Per-tab locks to serialize operations on the same tab
|
|
183
189
|
// tabId -> Promise (the currently executing operation)
|
|
@@ -188,9 +194,14 @@ async function withTabLock(tabId, operation) {
|
|
|
188
194
|
const pending = tabLocks.get(tabId);
|
|
189
195
|
if (pending) {
|
|
190
196
|
try {
|
|
191
|
-
await
|
|
197
|
+
await Promise.race([
|
|
198
|
+
pending,
|
|
199
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Tab lock timeout')), TAB_LOCK_TIMEOUT_MS))
|
|
200
|
+
]);
|
|
192
201
|
} catch (e) {
|
|
193
|
-
|
|
202
|
+
if (e.message === 'Tab lock timeout') {
|
|
203
|
+
log('warn', 'tab lock timeout, proceeding', { tabId });
|
|
204
|
+
}
|
|
194
205
|
}
|
|
195
206
|
}
|
|
196
207
|
|
|
@@ -233,9 +244,13 @@ async function withUserLimit(userId, operation) {
|
|
|
233
244
|
});
|
|
234
245
|
}
|
|
235
246
|
state.active++;
|
|
247
|
+
healthState.activeOps++;
|
|
236
248
|
try {
|
|
237
|
-
|
|
249
|
+
const result = await operation();
|
|
250
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
251
|
+
return result;
|
|
238
252
|
} finally {
|
|
253
|
+
healthState.activeOps--;
|
|
239
254
|
state.active--;
|
|
240
255
|
if (state.queue.length > 0) {
|
|
241
256
|
const next = state.queue.shift();
|
|
@@ -307,6 +322,59 @@ function clearBrowserIdleTimer() {
|
|
|
307
322
|
}
|
|
308
323
|
}
|
|
309
324
|
|
|
325
|
+
// --- Browser health tracking ---
|
|
326
|
+
const healthState = {
|
|
327
|
+
consecutiveNavFailures: 0,
|
|
328
|
+
lastSuccessfulNav: Date.now(),
|
|
329
|
+
isRecovering: false,
|
|
330
|
+
activeOps: 0,
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
function recordNavSuccess() {
|
|
334
|
+
healthState.consecutiveNavFailures = 0;
|
|
335
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function recordNavFailure() {
|
|
339
|
+
healthState.consecutiveNavFailures++;
|
|
340
|
+
return healthState.consecutiveNavFailures >= FAILURE_THRESHOLD;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
async function restartBrowser(reason) {
|
|
344
|
+
if (healthState.isRecovering) return;
|
|
345
|
+
healthState.isRecovering = true;
|
|
346
|
+
log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
|
|
347
|
+
try {
|
|
348
|
+
for (const [, session] of sessions) {
|
|
349
|
+
await session.context.close().catch(() => {});
|
|
350
|
+
}
|
|
351
|
+
sessions.clear();
|
|
352
|
+
if (browser) {
|
|
353
|
+
await browser.close().catch(() => {});
|
|
354
|
+
browser = null;
|
|
355
|
+
}
|
|
356
|
+
browserLaunchPromise = null;
|
|
357
|
+
await ensureBrowser();
|
|
358
|
+
healthState.consecutiveNavFailures = 0;
|
|
359
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
360
|
+
log('info', 'browser restarted successfully');
|
|
361
|
+
} catch (err) {
|
|
362
|
+
log('error', 'browser restart failed', { error: err.message });
|
|
363
|
+
} finally {
|
|
364
|
+
healthState.isRecovering = false;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function getTotalTabCount() {
|
|
369
|
+
let total = 0;
|
|
370
|
+
for (const session of sessions.values()) {
|
|
371
|
+
for (const group of session.tabGroups.values()) {
|
|
372
|
+
total += group.size;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return total;
|
|
376
|
+
}
|
|
377
|
+
|
|
310
378
|
async function launchBrowserInstance() {
|
|
311
379
|
const hostOS = getHostOS();
|
|
312
380
|
const proxy = buildProxyConfig();
|
|
@@ -406,7 +474,8 @@ function createTabState(page) {
|
|
|
406
474
|
page,
|
|
407
475
|
refs: new Map(),
|
|
408
476
|
visitedUrls: new Set(),
|
|
409
|
-
toolCalls: 0
|
|
477
|
+
toolCalls: 0,
|
|
478
|
+
lastSnapshot: null,
|
|
410
479
|
};
|
|
411
480
|
}
|
|
412
481
|
|
|
@@ -507,19 +576,47 @@ async function buildRefs(page) {
|
|
|
507
576
|
return refs;
|
|
508
577
|
}
|
|
509
578
|
|
|
579
|
+
const start = Date.now();
|
|
580
|
+
|
|
581
|
+
// Hard total timeout on the entire buildRefs operation
|
|
582
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
583
|
+
setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
try {
|
|
587
|
+
return await Promise.race([
|
|
588
|
+
_buildRefsInner(page, refs, start),
|
|
589
|
+
timeoutPromise
|
|
590
|
+
]);
|
|
591
|
+
} catch (err) {
|
|
592
|
+
if (err.message === 'buildRefs_timeout') {
|
|
593
|
+
log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
|
|
594
|
+
return refs;
|
|
595
|
+
}
|
|
596
|
+
throw err;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
async function _buildRefsInner(page, refs, start) {
|
|
510
601
|
await waitForPageReady(page, { waitForNetwork: false });
|
|
511
602
|
|
|
512
|
-
//
|
|
513
|
-
|
|
514
|
-
|
|
603
|
+
// Budget remaining time for ariaSnapshot
|
|
604
|
+
const elapsed = Date.now() - start;
|
|
605
|
+
const remaining = BUILDREFS_TIMEOUT_MS - elapsed;
|
|
606
|
+
if (remaining < 2000) {
|
|
607
|
+
log('warn', 'buildRefs: insufficient time for ariaSnapshot', { elapsed });
|
|
608
|
+
return refs;
|
|
609
|
+
}
|
|
610
|
+
|
|
515
611
|
let ariaYaml;
|
|
516
612
|
try {
|
|
517
|
-
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 5000 });
|
|
613
|
+
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(remaining - 1000, 5000) });
|
|
518
614
|
} catch (err) {
|
|
519
615
|
log('warn', 'ariaSnapshot failed, retrying');
|
|
616
|
+
const retryBudget = BUILDREFS_TIMEOUT_MS - (Date.now() - start);
|
|
617
|
+
if (retryBudget < 2000) return refs;
|
|
520
618
|
try {
|
|
521
|
-
await page.
|
|
522
|
-
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 5000 });
|
|
619
|
+
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(retryBudget - 500, 5000) });
|
|
523
620
|
} catch (retryErr) {
|
|
524
621
|
log('warn', 'ariaSnapshot retry failed, returning empty refs', { error: retryErr.message });
|
|
525
622
|
return refs;
|
|
@@ -593,15 +690,314 @@ function refToLocator(page, ref, refs) {
|
|
|
593
690
|
return locator;
|
|
594
691
|
}
|
|
595
692
|
|
|
596
|
-
//
|
|
693
|
+
// --- YouTube transcript extraction via yt-dlp ---
|
|
694
|
+
// POST /youtube/transcript { url, languages? }
|
|
695
|
+
// Uses yt-dlp to extract subtitles — no browser needed, no ads, no playback.
|
|
696
|
+
// yt-dlp handles YouTube's signed caption URLs correctly.
|
|
697
|
+
// Falls back to Camoufox page intercept if yt-dlp is not installed.
|
|
698
|
+
|
|
699
|
+
const { execFile } = require('child_process');
|
|
700
|
+
const { mkdtemp, readFile, readdir, rm } = require('fs/promises');
|
|
701
|
+
const { tmpdir } = require('os');
|
|
702
|
+
const { join } = require('path');
|
|
703
|
+
|
|
704
|
+
// Detect yt-dlp binary at startup
|
|
705
|
+
let ytDlpPath = null;
|
|
706
|
+
(async () => {
|
|
707
|
+
for (const candidate of ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp']) {
|
|
708
|
+
try {
|
|
709
|
+
await new Promise((resolve, reject) => {
|
|
710
|
+
execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
|
|
711
|
+
if (err) return reject(err);
|
|
712
|
+
resolve(stdout.trim());
|
|
713
|
+
});
|
|
714
|
+
});
|
|
715
|
+
ytDlpPath = candidate;
|
|
716
|
+
log('info', 'yt-dlp found', { path: candidate });
|
|
717
|
+
break;
|
|
718
|
+
} catch {}
|
|
719
|
+
}
|
|
720
|
+
if (!ytDlpPath) log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
|
|
721
|
+
})();
|
|
722
|
+
|
|
723
|
+
app.post('/youtube/transcript', async (req, res) => {
|
|
724
|
+
const reqId = req.reqId;
|
|
725
|
+
try {
|
|
726
|
+
const { url, languages = ['en'] } = req.body;
|
|
727
|
+
if (!url) return res.status(400).json({ error: 'url is required' });
|
|
728
|
+
|
|
729
|
+
const urlErr = validateUrl(url);
|
|
730
|
+
if (urlErr) return res.status(400).json({ error: urlErr });
|
|
731
|
+
|
|
732
|
+
const videoIdMatch = url.match(
|
|
733
|
+
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})/
|
|
734
|
+
);
|
|
735
|
+
if (!videoIdMatch) {
|
|
736
|
+
return res.status(400).json({ error: 'Could not extract YouTube video ID from URL' });
|
|
737
|
+
}
|
|
738
|
+
const videoId = videoIdMatch[1];
|
|
739
|
+
const lang = languages[0] || 'en';
|
|
740
|
+
|
|
741
|
+
log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: ytDlpPath ? 'yt-dlp' : 'browser' });
|
|
742
|
+
|
|
743
|
+
let result;
|
|
744
|
+
if (ytDlpPath) {
|
|
745
|
+
result = await ytDlpTranscript(reqId, url, videoId, lang);
|
|
746
|
+
} else {
|
|
747
|
+
result = await browserTranscript(reqId, url, videoId, lang);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
|
|
751
|
+
res.json(result);
|
|
752
|
+
} catch (err) {
|
|
753
|
+
log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
|
|
754
|
+
res.status(500).json({ error: safeError(err) });
|
|
755
|
+
}
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
// Strategy 1: yt-dlp (preferred — fast, no browser, no ads)
|
|
759
|
+
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
760
|
+
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
761
|
+
try {
|
|
762
|
+
// Step 1: Get title via --print (fast, no download)
|
|
763
|
+
const title = await new Promise((resolve, reject) => {
|
|
764
|
+
execFile(ytDlpPath, [
|
|
765
|
+
'--skip-download', '--no-warnings', '--print', '%(title)s', url,
|
|
766
|
+
], { timeout: 15000 }, (err, stdout) => {
|
|
767
|
+
if (err) return reject(new Error(`yt-dlp metadata failed: ${err.message}`));
|
|
768
|
+
resolve(stdout.trim().split('\n')[0] || '');
|
|
769
|
+
});
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
// Step 2: Download subtitles to temp dir
|
|
773
|
+
await new Promise((resolve, reject) => {
|
|
774
|
+
execFile(ytDlpPath, [
|
|
775
|
+
'--skip-download',
|
|
776
|
+
'--write-sub', '--write-auto-sub',
|
|
777
|
+
'--sub-lang', lang,
|
|
778
|
+
'--sub-format', 'json3',
|
|
779
|
+
'-o', join(tmpDir, '%(id)s'),
|
|
780
|
+
url,
|
|
781
|
+
], { timeout: 30000 }, (err, stdout, stderr) => {
|
|
782
|
+
if (err) return reject(new Error(`yt-dlp subtitle download failed: ${err.message}\n${stderr}`));
|
|
783
|
+
resolve();
|
|
784
|
+
});
|
|
785
|
+
});
|
|
786
|
+
|
|
787
|
+
// Find the subtitle file
|
|
788
|
+
const files = await readdir(tmpDir);
|
|
789
|
+
const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
790
|
+
if (!subFile) {
|
|
791
|
+
return {
|
|
792
|
+
status: 'error', code: 404,
|
|
793
|
+
message: 'No captions available for this video',
|
|
794
|
+
video_url: url, video_id: videoId, title,
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
799
|
+
let transcriptText = null;
|
|
800
|
+
|
|
801
|
+
if (subFile.endsWith('.json3')) {
|
|
802
|
+
transcriptText = parseJson3(content);
|
|
803
|
+
} else if (subFile.endsWith('.vtt')) {
|
|
804
|
+
transcriptText = parseVtt(content);
|
|
805
|
+
} else {
|
|
806
|
+
transcriptText = parseXml(content);
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
810
|
+
return {
|
|
811
|
+
status: 'error', code: 404,
|
|
812
|
+
message: 'Subtitle file found but content was empty',
|
|
813
|
+
video_url: url, video_id: videoId, title,
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// Detect language from filename (e.g., dQw4w9WgXcQ.en.json3)
|
|
818
|
+
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
819
|
+
|
|
820
|
+
return {
|
|
821
|
+
status: 'ok', transcript: transcriptText,
|
|
822
|
+
video_url: url, video_id: videoId, video_title: title,
|
|
823
|
+
language: langMatch?.[1] || lang,
|
|
824
|
+
total_words: transcriptText.split(/\s+/).length,
|
|
825
|
+
};
|
|
826
|
+
} finally {
|
|
827
|
+
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
// Strategy 2: Browser fallback — play video, intercept timedtext network response
|
|
832
|
+
async function browserTranscript(reqId, url, videoId, lang) {
|
|
833
|
+
return await withUserLimit('__yt_transcript__', async () => {
|
|
834
|
+
await ensureBrowser();
|
|
835
|
+
const session = await getSession('__yt_transcript__');
|
|
836
|
+
const page = await session.context.newPage();
|
|
837
|
+
|
|
838
|
+
try {
|
|
839
|
+
// Mute audio
|
|
840
|
+
await page.addInitScript(() => {
|
|
841
|
+
const origPlay = HTMLMediaElement.prototype.play;
|
|
842
|
+
HTMLMediaElement.prototype.play = function() { this.volume = 0; this.muted = true; return origPlay.call(this); };
|
|
843
|
+
});
|
|
844
|
+
|
|
845
|
+
// Intercept timedtext responses — filter by video ID to skip ad captions
|
|
846
|
+
let interceptedCaptions = null;
|
|
847
|
+
page.on('response', async (response) => {
|
|
848
|
+
const respUrl = response.url();
|
|
849
|
+
if (respUrl.includes('/api/timedtext') && respUrl.includes(`v=${videoId}`) && !interceptedCaptions) {
|
|
850
|
+
try {
|
|
851
|
+
const body = await response.text();
|
|
852
|
+
if (body && body.length > 0) interceptedCaptions = body;
|
|
853
|
+
} catch {}
|
|
854
|
+
}
|
|
855
|
+
});
|
|
856
|
+
|
|
857
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATE_TIMEOUT_MS });
|
|
858
|
+
await page.waitForTimeout(2000);
|
|
859
|
+
|
|
860
|
+
// Extract metadata from ytInitialPlayerResponse
|
|
861
|
+
const meta = await page.evaluate(() => {
|
|
862
|
+
const r = window.ytInitialPlayerResponse || (typeof ytInitialPlayerResponse !== 'undefined' ? ytInitialPlayerResponse : null);
|
|
863
|
+
if (!r) return { title: '' };
|
|
864
|
+
const tracks = r?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
|
|
865
|
+
return {
|
|
866
|
+
title: r?.videoDetails?.title || '',
|
|
867
|
+
languages: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual' })),
|
|
868
|
+
};
|
|
869
|
+
});
|
|
870
|
+
|
|
871
|
+
// Start playback to trigger caption loading
|
|
872
|
+
await page.evaluate(() => {
|
|
873
|
+
const v = document.querySelector('video');
|
|
874
|
+
if (v) { v.muted = true; v.play().catch(() => {}); }
|
|
875
|
+
}).catch(() => {});
|
|
876
|
+
|
|
877
|
+
// Wait up to 20s for the target video's captions (may need to sit through an ad)
|
|
878
|
+
for (let i = 0; i < 40 && !interceptedCaptions; i++) {
|
|
879
|
+
await page.waitForTimeout(500);
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
if (!interceptedCaptions) {
|
|
883
|
+
return {
|
|
884
|
+
status: 'error', code: 404,
|
|
885
|
+
message: 'No captions loaded during playback (video may have no captions, or ad blocked it)',
|
|
886
|
+
video_url: url, video_id: videoId, title: meta.title,
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
log('info', 'youtube transcript: intercepted captions', { reqId, len: interceptedCaptions.length });
|
|
891
|
+
|
|
892
|
+
let transcriptText = null;
|
|
893
|
+
if (interceptedCaptions.trimStart().startsWith('{')) transcriptText = parseJson3(interceptedCaptions);
|
|
894
|
+
else if (interceptedCaptions.includes('WEBVTT')) transcriptText = parseVtt(interceptedCaptions);
|
|
895
|
+
else if (interceptedCaptions.includes('<text')) transcriptText = parseXml(interceptedCaptions);
|
|
896
|
+
|
|
897
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
898
|
+
return {
|
|
899
|
+
status: 'error', code: 404,
|
|
900
|
+
message: 'Caption data intercepted but could not be parsed',
|
|
901
|
+
video_url: url, video_id: videoId, title: meta.title,
|
|
902
|
+
};
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
return {
|
|
906
|
+
status: 'ok', transcript: transcriptText,
|
|
907
|
+
video_url: url, video_id: videoId, video_title: meta.title,
|
|
908
|
+
language: lang, total_words: transcriptText.split(/\s+/).length,
|
|
909
|
+
available_languages: meta.languages,
|
|
910
|
+
};
|
|
911
|
+
} finally {
|
|
912
|
+
await safePageClose(page);
|
|
913
|
+
}
|
|
914
|
+
});
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// --- YouTube transcript parsers ---
|
|
918
|
+
|
|
919
|
+
function parseJson3(content) {
|
|
920
|
+
try {
|
|
921
|
+
const data = JSON.parse(content);
|
|
922
|
+
const events = data.events || [];
|
|
923
|
+
const lines = [];
|
|
924
|
+
for (const event of events) {
|
|
925
|
+
const segs = event.segs || [];
|
|
926
|
+
if (!segs.length) continue;
|
|
927
|
+
const text = segs.map(s => s.utf8 || '').join('').trim();
|
|
928
|
+
if (!text) continue;
|
|
929
|
+
const tsMs = event.tStartMs || 0;
|
|
930
|
+
const tsSec = Math.floor(tsMs / 1000);
|
|
931
|
+
const mm = Math.floor(tsSec / 60);
|
|
932
|
+
const ss = tsSec % 60;
|
|
933
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
934
|
+
}
|
|
935
|
+
return lines.join('\n');
|
|
936
|
+
} catch (e) {
|
|
937
|
+
return null;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
function parseVtt(content) {
|
|
942
|
+
const lines = content.split('\n');
|
|
943
|
+
const result = [];
|
|
944
|
+
let currentTimestamp = '';
|
|
945
|
+
for (const line of lines) {
|
|
946
|
+
const stripped = line.trim();
|
|
947
|
+
if (!stripped || stripped === 'WEBVTT' || stripped.startsWith('Kind:') || stripped.startsWith('Language:') || stripped.startsWith('NOTE')) continue;
|
|
948
|
+
if (stripped.includes(' --> ')) {
|
|
949
|
+
const parts = stripped.split(' --> ');
|
|
950
|
+
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
951
|
+
continue;
|
|
952
|
+
}
|
|
953
|
+
const text = stripped.replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
954
|
+
if (text && currentTimestamp) { result.push(`[${currentTimestamp}] ${text}`); currentTimestamp = ''; }
|
|
955
|
+
else if (text) result.push(text);
|
|
956
|
+
}
|
|
957
|
+
return result.join('\n');
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
function parseXml(content) {
|
|
961
|
+
const lines = [];
|
|
962
|
+
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
963
|
+
let match;
|
|
964
|
+
while ((match = regex.exec(content)) !== null) {
|
|
965
|
+
const startSec = parseFloat(match[1]) || 0;
|
|
966
|
+
const text = match[2].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
967
|
+
if (!text) continue;
|
|
968
|
+
const mm = Math.floor(startSec / 60);
|
|
969
|
+
const ss = Math.floor(startSec % 60);
|
|
970
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
971
|
+
}
|
|
972
|
+
return lines.join('\n');
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
function formatVttTs(ts) {
|
|
976
|
+
const parts = ts.split(':');
|
|
977
|
+
if (parts.length >= 3) {
|
|
978
|
+
const hours = parseInt(parts[0]) || 0;
|
|
979
|
+
const minutes = parseInt(parts[1]) || 0;
|
|
980
|
+
const totalMin = hours * 60 + minutes;
|
|
981
|
+
const seconds = (parts[2] || '00').split('.')[0];
|
|
982
|
+
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
983
|
+
} else if (parts.length === 2) {
|
|
984
|
+
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
985
|
+
}
|
|
986
|
+
return ts;
|
|
987
|
+
}
|
|
988
|
+
|
|
597
989
|
app.get('/health', (req, res) => {
|
|
990
|
+
if (healthState.isRecovering) {
|
|
991
|
+
return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
|
|
992
|
+
}
|
|
598
993
|
const running = browser !== null && (browser.isConnected?.() ?? false);
|
|
599
994
|
res.json({
|
|
600
995
|
ok: true,
|
|
601
996
|
engine: 'camoufox',
|
|
602
997
|
browserConnected: running,
|
|
603
998
|
browserRunning: running,
|
|
604
|
-
|
|
999
|
+
activeTabs: getTotalTabCount(),
|
|
1000
|
+
consecutiveFailures: healthState.consecutiveNavFailures,
|
|
605
1001
|
});
|
|
606
1002
|
});
|
|
607
1003
|
|
|
@@ -658,23 +1054,46 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
658
1054
|
let session = sessions.get(normalizeUserId(userId));
|
|
659
1055
|
let found = session && findTab(session, tabId);
|
|
660
1056
|
|
|
1057
|
+
let tabState;
|
|
661
1058
|
if (!found) {
|
|
662
1059
|
const resolvedSessionKey = sessionKey || listItemId || 'default';
|
|
663
1060
|
session = await getSession(userId);
|
|
664
|
-
let
|
|
665
|
-
for (const g of session.tabGroups.values())
|
|
666
|
-
if (
|
|
667
|
-
|
|
1061
|
+
let sessionTabs = 0;
|
|
1062
|
+
for (const g of session.tabGroups.values()) sessionTabs += g.size;
|
|
1063
|
+
if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
|
|
1064
|
+
// Reuse oldest tab in session instead of rejecting
|
|
1065
|
+
let oldestTab = null;
|
|
1066
|
+
let oldestGroup = null;
|
|
1067
|
+
let oldestTabId = null;
|
|
1068
|
+
for (const [gKey, group] of session.tabGroups) {
|
|
1069
|
+
for (const [tid, ts] of group) {
|
|
1070
|
+
if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
|
|
1071
|
+
oldestTab = ts;
|
|
1072
|
+
oldestGroup = group;
|
|
1073
|
+
oldestTabId = tid;
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
if (oldestTab) {
|
|
1078
|
+
tabState = oldestTab;
|
|
1079
|
+
const group = getTabGroup(session, resolvedSessionKey);
|
|
1080
|
+
if (oldestGroup) oldestGroup.delete(oldestTabId);
|
|
1081
|
+
group.set(tabId, tabState);
|
|
1082
|
+
tabLocks.delete(oldestTabId);
|
|
1083
|
+
log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
|
|
1084
|
+
} else {
|
|
1085
|
+
throw new Error('Maximum tabs per session reached');
|
|
1086
|
+
}
|
|
1087
|
+
} else {
|
|
1088
|
+
const page = await session.context.newPage();
|
|
1089
|
+
tabState = createTabState(page);
|
|
1090
|
+
const group = getTabGroup(session, resolvedSessionKey);
|
|
1091
|
+
group.set(tabId, tabState);
|
|
1092
|
+
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
668
1093
|
}
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
const group = getTabGroup(session, resolvedSessionKey);
|
|
672
|
-
group.set(tabId, newTabState);
|
|
673
|
-
found = { tabState: newTabState, listItemId: resolvedSessionKey, group };
|
|
674
|
-
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
1094
|
+
} else {
|
|
1095
|
+
tabState = found.tabState;
|
|
675
1096
|
}
|
|
676
|
-
|
|
677
|
-
const { tabState } = found;
|
|
678
1097
|
tabState.toolCalls++;
|
|
679
1098
|
|
|
680
1099
|
let targetUrl = url;
|
|
@@ -690,8 +1109,9 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
690
1109
|
return await withTabLock(tabId, async () => {
|
|
691
1110
|
await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
692
1111
|
tabState.visitedUrls.add(targetUrl);
|
|
1112
|
+
tabState.lastSnapshot = null;
|
|
693
1113
|
tabState.refs = await buildRefs(tabState.page);
|
|
694
|
-
return { ok: true, tabId, url: tabState.page.url() };
|
|
1114
|
+
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
|
|
695
1115
|
});
|
|
696
1116
|
})(), HANDLER_TIMEOUT_MS, 'navigate'));
|
|
697
1117
|
|
|
@@ -699,7 +1119,8 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
699
1119
|
res.json(result);
|
|
700
1120
|
} catch (err) {
|
|
701
1121
|
log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
|
|
702
|
-
|
|
1122
|
+
const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
|
|
1123
|
+
res.status(status).json({ error: safeError(err) });
|
|
703
1124
|
}
|
|
704
1125
|
});
|
|
705
1126
|
|
|
@@ -709,6 +1130,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
709
1130
|
const userId = req.query.userId;
|
|
710
1131
|
if (!userId) return res.status(400).json({ error: 'userId required' });
|
|
711
1132
|
const format = req.query.format || 'text';
|
|
1133
|
+
const offset = parseInt(req.query.offset) || 0;
|
|
712
1134
|
const session = sessions.get(normalizeUserId(userId));
|
|
713
1135
|
const found = session && findTab(session, req.params.tabId);
|
|
714
1136
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
@@ -716,6 +1138,18 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
716
1138
|
const { tabState } = found;
|
|
717
1139
|
tabState.toolCalls++;
|
|
718
1140
|
|
|
1141
|
+
// Cached chunk retrieval for offset>0 requests
|
|
1142
|
+
if (offset > 0 && tabState.lastSnapshot) {
|
|
1143
|
+
const win = windowSnapshot(tabState.lastSnapshot, offset);
|
|
1144
|
+
const response = { url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
|
|
1145
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1146
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1147
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1148
|
+
}
|
|
1149
|
+
log('info', 'snapshot (cached offset)', { reqId: req.reqId, tabId: req.params.tabId, offset, totalChars: win.totalChars });
|
|
1150
|
+
return res.json(response);
|
|
1151
|
+
}
|
|
1152
|
+
|
|
719
1153
|
const result = await withUserLimit(userId, () => withTimeout((async () => {
|
|
720
1154
|
tabState.refs = await buildRefs(tabState.page);
|
|
721
1155
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
@@ -754,14 +1188,28 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
754
1188
|
}).join('\n');
|
|
755
1189
|
}
|
|
756
1190
|
|
|
757
|
-
|
|
1191
|
+
tabState.lastSnapshot = annotatedYaml;
|
|
1192
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
1193
|
+
|
|
1194
|
+
const response = {
|
|
758
1195
|
url: tabState.page.url(),
|
|
759
|
-
snapshot:
|
|
760
|
-
refsCount: tabState.refs.size
|
|
1196
|
+
snapshot: win.text,
|
|
1197
|
+
refsCount: tabState.refs.size,
|
|
1198
|
+
truncated: win.truncated,
|
|
1199
|
+
totalChars: win.totalChars,
|
|
1200
|
+
hasMore: win.hasMore,
|
|
1201
|
+
nextOffset: win.nextOffset,
|
|
761
1202
|
};
|
|
1203
|
+
|
|
1204
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1205
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1206
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
return response;
|
|
762
1210
|
})(), HANDLER_TIMEOUT_MS, 'snapshot'));
|
|
763
1211
|
|
|
764
|
-
log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount });
|
|
1212
|
+
log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
|
|
765
1213
|
res.json(result);
|
|
766
1214
|
} catch (err) {
|
|
767
1215
|
log('error', 'snapshot failed', { reqId: req.reqId, tabId: req.params.tabId, error: err.message });
|
|
@@ -844,7 +1292,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
844
1292
|
log('warn', 'force click failed, trying mouse sequence');
|
|
845
1293
|
await dispatchMouseSequence(locator);
|
|
846
1294
|
}
|
|
847
|
-
} else if (err.message.includes('not visible') || err.message.includes('timeout')) {
|
|
1295
|
+
} else if (err.message.includes('not visible') || err.message.toLowerCase().includes('timeout')) {
|
|
848
1296
|
// Fallback 2: Element not responding to click, try mouse sequence
|
|
849
1297
|
log('warn', 'click timeout, trying mouse sequence');
|
|
850
1298
|
await dispatchMouseSequence(locator);
|
|
@@ -855,7 +1303,13 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
855
1303
|
};
|
|
856
1304
|
|
|
857
1305
|
if (ref) {
|
|
858
|
-
|
|
1306
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1307
|
+
if (!locator && tabState.refs.size === 0) {
|
|
1308
|
+
// Auto-refresh refs on stale state before failing
|
|
1309
|
+
log('info', 'auto-refreshing stale refs before click', { ref });
|
|
1310
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
1311
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1312
|
+
}
|
|
859
1313
|
if (!locator) {
|
|
860
1314
|
const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none';
|
|
861
1315
|
throw new Error(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${tabState.refs.size} total). Refs reset after navigation - call snapshot first.`);
|
|
@@ -866,11 +1320,12 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
866
1320
|
}
|
|
867
1321
|
|
|
868
1322
|
await tabState.page.waitForTimeout(500);
|
|
1323
|
+
tabState.lastSnapshot = null;
|
|
869
1324
|
tabState.refs = await buildRefs(tabState.page);
|
|
870
1325
|
|
|
871
1326
|
const newUrl = tabState.page.url();
|
|
872
1327
|
tabState.visitedUrls.add(newUrl);
|
|
873
|
-
return { ok: true, url: newUrl };
|
|
1328
|
+
return { ok: true, url: newUrl, refsAvailable: tabState.refs.size > 0 };
|
|
874
1329
|
}), HANDLER_TIMEOUT_MS, 'click'));
|
|
875
1330
|
|
|
876
1331
|
log('info', 'clicked', { reqId: req.reqId, tabId, url: result.url });
|
|
@@ -1215,7 +1670,6 @@ app.get('/', (req, res) => {
|
|
|
1215
1670
|
engine: 'camoufox',
|
|
1216
1671
|
browserConnected: running,
|
|
1217
1672
|
browserRunning: running,
|
|
1218
|
-
sessions: sessions.size,
|
|
1219
1673
|
});
|
|
1220
1674
|
});
|
|
1221
1675
|
|
|
@@ -1364,6 +1818,7 @@ app.post('/navigate', async (req, res) => {
|
|
|
1364
1818
|
app.get('/snapshot', async (req, res) => {
|
|
1365
1819
|
try {
|
|
1366
1820
|
const { targetId, userId, format = 'text' } = req.query;
|
|
1821
|
+
const offset = parseInt(req.query.offset) || 0;
|
|
1367
1822
|
if (!userId) {
|
|
1368
1823
|
return res.status(400).json({ error: 'userId is required' });
|
|
1369
1824
|
}
|
|
@@ -1376,6 +1831,18 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1376
1831
|
|
|
1377
1832
|
const { tabState } = found;
|
|
1378
1833
|
tabState.toolCalls++;
|
|
1834
|
+
|
|
1835
|
+
// Cached chunk retrieval
|
|
1836
|
+
if (offset > 0 && tabState.lastSnapshot) {
|
|
1837
|
+
const win = windowSnapshot(tabState.lastSnapshot, offset);
|
|
1838
|
+
const response = { ok: true, format: 'aria', targetId, url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
|
|
1839
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1840
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1841
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1842
|
+
}
|
|
1843
|
+
return res.json(response);
|
|
1844
|
+
}
|
|
1845
|
+
|
|
1379
1846
|
tabState.refs = await buildRefs(tabState.page);
|
|
1380
1847
|
|
|
1381
1848
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
@@ -1404,14 +1871,28 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1404
1871
|
}).join('\n');
|
|
1405
1872
|
}
|
|
1406
1873
|
|
|
1407
|
-
|
|
1874
|
+
tabState.lastSnapshot = annotatedYaml;
|
|
1875
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
1876
|
+
|
|
1877
|
+
const response = {
|
|
1408
1878
|
ok: true,
|
|
1409
1879
|
format: 'aria',
|
|
1410
1880
|
targetId,
|
|
1411
1881
|
url: tabState.page.url(),
|
|
1412
|
-
snapshot:
|
|
1413
|
-
refsCount: tabState.refs.size
|
|
1414
|
-
|
|
1882
|
+
snapshot: win.text,
|
|
1883
|
+
refsCount: tabState.refs.size,
|
|
1884
|
+
truncated: win.truncated,
|
|
1885
|
+
totalChars: win.totalChars,
|
|
1886
|
+
hasMore: win.hasMore,
|
|
1887
|
+
nextOffset: win.nextOffset,
|
|
1888
|
+
};
|
|
1889
|
+
|
|
1890
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1891
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1892
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
res.json(response);
|
|
1415
1896
|
} catch (err) {
|
|
1416
1897
|
log('error', 'openclaw snapshot failed', { reqId: req.reqId, error: err.message });
|
|
1417
1898
|
res.status(500).json({ error: safeError(err) });
|
|
@@ -1584,6 +2065,32 @@ setInterval(() => {
|
|
|
1584
2065
|
});
|
|
1585
2066
|
}, 5 * 60_000);
|
|
1586
2067
|
|
|
2068
|
+
// Active health probe — detect hung browser even when isConnected() lies
|
|
2069
|
+
setInterval(async () => {
|
|
2070
|
+
if (!browser || healthState.isRecovering) return;
|
|
2071
|
+
// Skip probe if operations are in flight
|
|
2072
|
+
if (healthState.activeOps > 0) {
|
|
2073
|
+
log('info', 'health probe skipped, operations active', { activeOps: healthState.activeOps });
|
|
2074
|
+
return;
|
|
2075
|
+
}
|
|
2076
|
+
const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
|
|
2077
|
+
if (timeSinceSuccess < 120000) return;
|
|
2078
|
+
|
|
2079
|
+
let testContext;
|
|
2080
|
+
try {
|
|
2081
|
+
testContext = await browser.newContext();
|
|
2082
|
+
const page = await testContext.newPage();
|
|
2083
|
+
await page.goto('about:blank', { timeout: 5000 });
|
|
2084
|
+
await page.close();
|
|
2085
|
+
await testContext.close();
|
|
2086
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
2087
|
+
} catch (err) {
|
|
2088
|
+
log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
|
|
2089
|
+
if (testContext) await testContext.close().catch(() => {});
|
|
2090
|
+
restartBrowser('health probe failed').catch(() => {});
|
|
2091
|
+
}
|
|
2092
|
+
}, 60_000);
|
|
2093
|
+
|
|
1587
2094
|
// Crash logging
|
|
1588
2095
|
process.on('uncaughtException', (err) => {
|
|
1589
2096
|
log('error', 'uncaughtException', { error: err.message, stack: err.stack });
|