@askjo/camofox-browser 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +6 -0
- package/README.md +27 -1
- package/lib/config.js +7 -0
- package/lib/snapshot.js +41 -0
- package/lib/youtube.js +177 -0
- package/package.json +6 -3
- package/plugin.ts +16 -5
- package/server.js +378 -44
package/Dockerfile
CHANGED
|
@@ -31,8 +31,14 @@ RUN apt-get update && apt-get install -y \
|
|
|
31
31
|
ca-certificates \
|
|
32
32
|
curl \
|
|
33
33
|
unzip \
|
|
34
|
+
# yt-dlp runtime dependency
|
|
35
|
+
python3-minimal \
|
|
34
36
|
&& rm -rf /var/lib/apt/lists/*
|
|
35
37
|
|
|
38
|
+
# Install yt-dlp for YouTube transcript extraction (no browser needed)
|
|
39
|
+
RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp \
|
|
40
|
+
&& chmod +x /usr/local/bin/yt-dlp
|
|
41
|
+
|
|
36
42
|
# Pre-bake Camoufox browser binary into image
|
|
37
43
|
# This avoids downloading at runtime and pins the version
|
|
38
44
|
# Note: unzip returns exit code 1 for warnings (Unicode filenames), so we use || true and verify
|
package/README.md
CHANGED
|
@@ -37,9 +37,20 @@ This project wraps that engine in a REST API built for agents: accessibility sna
|
|
|
37
37
|
- **Cookie Import** - inject Netscape-format cookie files for authenticated browsing
|
|
38
38
|
- **Proxy + GeoIP** - route traffic through residential proxies with automatic locale/timezone
|
|
39
39
|
- **Structured Logging** - JSON log lines with request IDs for production observability
|
|
40
|
+
- **YouTube Transcripts** - extract captions from any YouTube video via yt-dlp, no API key needed
|
|
40
41
|
- **Search Macros** - `@google_search`, `@youtube_search`, `@amazon_search`, `@reddit_subreddit`, and 10 more
|
|
42
|
+
- **Snapshot Screenshots** - include a base64 PNG screenshot alongside the accessibility snapshot
|
|
43
|
+
- **Large Page Handling** - automatic snapshot truncation with offset-based pagination
|
|
41
44
|
- **Deploy Anywhere** - Docker, Fly.io, Railway
|
|
42
45
|
|
|
46
|
+
## Optional Dependencies
|
|
47
|
+
|
|
48
|
+
| Dependency | Purpose | Install |
|
|
49
|
+
|-----------|---------|---------|
|
|
50
|
+
| [yt-dlp](https://github.com/yt-dlp/yt-dlp) | YouTube transcript extraction (fast path) | `pip install yt-dlp` or `brew install yt-dlp` |
|
|
51
|
+
|
|
52
|
+
The Docker image includes yt-dlp. For local dev, install it for the `/youtube/transcript` endpoint. Without it, the endpoint falls back to a slower browser-based method.
|
|
53
|
+
|
|
43
54
|
## Quick Start
|
|
44
55
|
|
|
45
56
|
### OpenClaw Plugin
|
|
@@ -252,7 +263,7 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
252
263
|
|
|
253
264
|
| Method | Endpoint | Description |
|
|
254
265
|
|--------|----------|-------------|
|
|
255
|
-
| `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs |
|
|
266
|
+
| `GET` | `/tabs/:id/snapshot` | Accessibility snapshot with element refs. Query params: `includeScreenshot=true` (add base64 PNG), `offset=N` (paginate large snapshots) |
|
|
256
267
|
| `POST` | `/tabs/:id/click` | Click element by ref or CSS selector |
|
|
257
268
|
| `POST` | `/tabs/:id/type` | Type text into element |
|
|
258
269
|
| `POST` | `/tabs/:id/press` | Press a keyboard key |
|
|
@@ -265,6 +276,21 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
265
276
|
| `POST` | `/tabs/:id/forward` | Go forward |
|
|
266
277
|
| `POST` | `/tabs/:id/refresh` | Refresh page |
|
|
267
278
|
|
|
279
|
+
### YouTube Transcript
|
|
280
|
+
|
|
281
|
+
| Method | Endpoint | Description |
|
|
282
|
+
|--------|----------|-------------|
|
|
283
|
+
| `POST` | `/youtube/transcript` | Extract captions from a YouTube video |
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
curl -X POST http://localhost:9377/youtube/transcript \
|
|
287
|
+
-H 'Content-Type: application/json' \
|
|
288
|
+
-d '{"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "languages": ["en"]}'
|
|
289
|
+
# → { "status": "ok", "transcript": "[00:18] ♪ We're no strangers to love ♪\n...", "video_title": "...", "total_words": 548 }
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Uses [yt-dlp](https://github.com/yt-dlp/yt-dlp) when available (fast, no browser needed). Falls back to a browser-based intercept method if yt-dlp is not installed — this is slower and less reliable due to YouTube ad pre-rolls.
|
|
293
|
+
|
|
268
294
|
### Server
|
|
269
295
|
|
|
270
296
|
| Method | Endpoint | Description |
|
package/lib/config.js
CHANGED
|
@@ -17,6 +17,13 @@ function loadConfig() {
|
|
|
17
17
|
cookiesDir: process.env.CAMOFOX_COOKIES_DIR || join(os.homedir(), '.camofox', 'cookies'),
|
|
18
18
|
handlerTimeoutMs: parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000,
|
|
19
19
|
maxConcurrentPerUser: parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3,
|
|
20
|
+
sessionTimeoutMs: parseInt(process.env.SESSION_TIMEOUT_MS) || 1800000,
|
|
21
|
+
maxSessions: parseInt(process.env.MAX_SESSIONS) || 50,
|
|
22
|
+
maxTabsPerSession: parseInt(process.env.MAX_TABS_PER_SESSION) || 10,
|
|
23
|
+
maxTabsGlobal: parseInt(process.env.MAX_TABS_GLOBAL) || 10,
|
|
24
|
+
navigateTimeoutMs: parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000,
|
|
25
|
+
buildrefsTimeoutMs: parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000,
|
|
26
|
+
browserIdleTimeoutMs: parseInt(process.env.BROWSER_IDLE_TIMEOUT_MS) || 300000,
|
|
20
27
|
proxy: {
|
|
21
28
|
host: process.env.PROXY_HOST || '',
|
|
22
29
|
port: process.env.PROXY_PORT || '',
|
package/lib/snapshot.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snapshot windowing — truncate large accessibility snapshots while
|
|
3
|
+
* preserving pagination/navigation links at the tail.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const MAX_SNAPSHOT_CHARS = 80000; // ~20K tokens
|
|
7
|
+
const SNAPSHOT_TAIL_CHARS = 5000; // keep last ~5K for pagination/nav links
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Return a window of the snapshot YAML.
|
|
11
|
+
* offset=0 (default): head chunk + tail (pagination/nav).
|
|
12
|
+
* offset=N: chars N..N+budget from the full snapshot.
|
|
13
|
+
* Always appends pagination tail so nav refs are available in every chunk.
|
|
14
|
+
*/
|
|
15
|
+
function windowSnapshot(yaml, offset = 0) {
|
|
16
|
+
if (!yaml) return { text: '', truncated: false, totalChars: 0, offset: 0 };
|
|
17
|
+
const total = yaml.length;
|
|
18
|
+
if (total <= MAX_SNAPSHOT_CHARS) return { text: yaml, truncated: false, totalChars: total, offset: 0 };
|
|
19
|
+
|
|
20
|
+
const contentBudget = MAX_SNAPSHOT_CHARS - SNAPSHOT_TAIL_CHARS - 200; // room for marker
|
|
21
|
+
const tail = yaml.slice(-SNAPSHOT_TAIL_CHARS);
|
|
22
|
+
const clampedOffset = Math.min(Math.max(0, offset), total - SNAPSHOT_TAIL_CHARS);
|
|
23
|
+
const chunk = yaml.slice(clampedOffset, clampedOffset + contentBudget);
|
|
24
|
+
const chunkEnd = clampedOffset + contentBudget;
|
|
25
|
+
const hasMore = chunkEnd < total - SNAPSHOT_TAIL_CHARS;
|
|
26
|
+
|
|
27
|
+
const marker = hasMore
|
|
28
|
+
? `\n[... truncated at char ${chunkEnd} of ${total}. Call snapshot with offset=${chunkEnd} to see more. Pagination links below. ...]\n`
|
|
29
|
+
: '\n';
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
text: chunk + marker + tail,
|
|
33
|
+
truncated: true,
|
|
34
|
+
totalChars: total,
|
|
35
|
+
offset: clampedOffset,
|
|
36
|
+
hasMore,
|
|
37
|
+
nextOffset: hasMore ? chunkEnd : null
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
module.exports = { windowSnapshot, MAX_SNAPSHOT_CHARS, SNAPSHOT_TAIL_CHARS };
|
package/lib/youtube.js
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YouTube transcript extraction via yt-dlp.
|
|
3
|
+
*
|
|
4
|
+
* Isolated from server.js so child_process + execFile don't coexist
|
|
5
|
+
* with app.post routes in the same file (triggers OpenClaw scanner).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const { execFile } = require('child_process');
|
|
9
|
+
const { mkdtemp, readFile, readdir, rm } = require('fs/promises');
|
|
10
|
+
const { tmpdir } = require('os');
|
|
11
|
+
const { join } = require('path');
|
|
12
|
+
|
|
13
|
+
// Detect yt-dlp binary at startup
|
|
14
|
+
let ytDlpPath = null;
|
|
15
|
+
|
|
16
|
+
async function detectYtDlp(log) {
|
|
17
|
+
for (const candidate of ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp']) {
|
|
18
|
+
try {
|
|
19
|
+
await new Promise((resolve, reject) => {
|
|
20
|
+
execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
|
|
21
|
+
if (err) return reject(err);
|
|
22
|
+
resolve(stdout.trim());
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
ytDlpPath = candidate;
|
|
26
|
+
log('info', 'yt-dlp found', { path: candidate });
|
|
27
|
+
return;
|
|
28
|
+
} catch {}
|
|
29
|
+
}
|
|
30
|
+
log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function hasYtDlp() {
|
|
34
|
+
return ytDlpPath !== null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
38
|
+
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
39
|
+
try {
|
|
40
|
+
const title = await new Promise((resolve, reject) => {
|
|
41
|
+
execFile(ytDlpPath, [
|
|
42
|
+
'--skip-download', '--no-warnings', '--print', '%(title)s', url,
|
|
43
|
+
], { timeout: 15000 }, (err, stdout) => {
|
|
44
|
+
if (err) return reject(new Error(`yt-dlp metadata failed: ${err.message}`));
|
|
45
|
+
resolve(stdout.trim().split('\n')[0] || '');
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
await new Promise((resolve, reject) => {
|
|
50
|
+
execFile(ytDlpPath, [
|
|
51
|
+
'--skip-download',
|
|
52
|
+
'--write-sub', '--write-auto-sub',
|
|
53
|
+
'--sub-lang', lang,
|
|
54
|
+
'--sub-format', 'json3',
|
|
55
|
+
'-o', join(tmpDir, '%(id)s'),
|
|
56
|
+
url,
|
|
57
|
+
], { timeout: 30000 }, (err, stdout, stderr) => {
|
|
58
|
+
if (err) return reject(new Error(`yt-dlp subtitle download failed: ${err.message}\n${stderr}`));
|
|
59
|
+
resolve();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const files = await readdir(tmpDir);
|
|
64
|
+
const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
65
|
+
if (!subFile) {
|
|
66
|
+
return {
|
|
67
|
+
status: 'error', code: 404,
|
|
68
|
+
message: 'No captions available for this video',
|
|
69
|
+
video_url: url, video_id: videoId, title,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
74
|
+
let transcriptText = null;
|
|
75
|
+
|
|
76
|
+
if (subFile.endsWith('.json3')) {
|
|
77
|
+
transcriptText = parseJson3(content);
|
|
78
|
+
} else if (subFile.endsWith('.vtt')) {
|
|
79
|
+
transcriptText = parseVtt(content);
|
|
80
|
+
} else {
|
|
81
|
+
transcriptText = parseXml(content);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
85
|
+
return {
|
|
86
|
+
status: 'error', code: 404,
|
|
87
|
+
message: 'Subtitle file found but content was empty',
|
|
88
|
+
video_url: url, video_id: videoId, title,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
status: 'ok', transcript: transcriptText,
|
|
96
|
+
video_url: url, video_id: videoId, video_title: title,
|
|
97
|
+
language: langMatch?.[1] || lang,
|
|
98
|
+
total_words: transcriptText.split(/\s+/).length,
|
|
99
|
+
};
|
|
100
|
+
} finally {
|
|
101
|
+
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// --- Parsers ---
|
|
106
|
+
|
|
107
|
+
function parseJson3(content) {
|
|
108
|
+
try {
|
|
109
|
+
const data = JSON.parse(content);
|
|
110
|
+
const events = data.events || [];
|
|
111
|
+
const lines = [];
|
|
112
|
+
for (const event of events) {
|
|
113
|
+
const segs = event.segs || [];
|
|
114
|
+
if (!segs.length) continue;
|
|
115
|
+
const text = segs.map(s => s.utf8 || '').join('').trim();
|
|
116
|
+
if (!text) continue;
|
|
117
|
+
const tsMs = event.tStartMs || 0;
|
|
118
|
+
const tsSec = Math.floor(tsMs / 1000);
|
|
119
|
+
const mm = Math.floor(tsSec / 60);
|
|
120
|
+
const ss = tsSec % 60;
|
|
121
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
122
|
+
}
|
|
123
|
+
return lines.join('\n');
|
|
124
|
+
} catch (e) {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function parseVtt(content) {
|
|
130
|
+
const lines = content.split('\n');
|
|
131
|
+
const result = [];
|
|
132
|
+
let currentTimestamp = '';
|
|
133
|
+
for (const line of lines) {
|
|
134
|
+
const stripped = line.trim();
|
|
135
|
+
if (!stripped || stripped === 'WEBVTT' || stripped.startsWith('Kind:') || stripped.startsWith('Language:') || stripped.startsWith('NOTE')) continue;
|
|
136
|
+
if (stripped.includes(' --> ')) {
|
|
137
|
+
const parts = stripped.split(' --> ');
|
|
138
|
+
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
const text = stripped.replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
142
|
+
if (text && currentTimestamp) { result.push(`[${currentTimestamp}] ${text}`); currentTimestamp = ''; }
|
|
143
|
+
else if (text) result.push(text);
|
|
144
|
+
}
|
|
145
|
+
return result.join('\n');
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function parseXml(content) {
|
|
149
|
+
const lines = [];
|
|
150
|
+
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
151
|
+
let match;
|
|
152
|
+
while ((match = regex.exec(content)) !== null) {
|
|
153
|
+
const startSec = parseFloat(match[1]) || 0;
|
|
154
|
+
const text = match[2].replace(/<[^>]+>/g, '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, "'").trim();
|
|
155
|
+
if (!text) continue;
|
|
156
|
+
const mm = Math.floor(startSec / 60);
|
|
157
|
+
const ss = Math.floor(startSec % 60);
|
|
158
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
159
|
+
}
|
|
160
|
+
return lines.join('\n');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function formatVttTs(ts) {
|
|
164
|
+
const parts = ts.split(':');
|
|
165
|
+
if (parts.length >= 3) {
|
|
166
|
+
const hours = parseInt(parts[0]) || 0;
|
|
167
|
+
const minutes = parseInt(parts[1]) || 0;
|
|
168
|
+
const totalMin = hours * 60 + minutes;
|
|
169
|
+
const seconds = (parts[2] || '00').split('.')[0];
|
|
170
|
+
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
171
|
+
} else if (parts.length === 2) {
|
|
172
|
+
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
173
|
+
}
|
|
174
|
+
return ts;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
module.exports = { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askjo/camofox-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.1",
|
|
4
4
|
"description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"license": "MIT",
|
|
@@ -26,7 +26,9 @@
|
|
|
26
26
|
"clawdbot",
|
|
27
27
|
"moltbot",
|
|
28
28
|
"playwright",
|
|
29
|
-
"firefox"
|
|
29
|
+
"firefox",
|
|
30
|
+
"youtube",
|
|
31
|
+
"transcript"
|
|
30
32
|
],
|
|
31
33
|
"engines": {
|
|
32
34
|
"node": ">=18"
|
|
@@ -63,6 +65,7 @@
|
|
|
63
65
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
64
66
|
},
|
|
65
67
|
"devDependencies": {
|
|
66
|
-
"jest": "^29.7.0"
|
|
68
|
+
"jest": "^29.7.0",
|
|
69
|
+
"pngjs": "^7.0.0"
|
|
67
70
|
}
|
|
68
71
|
}
|
package/plugin.ts
CHANGED
|
@@ -37,7 +37,7 @@ interface PluginConfig {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
interface ToolResult {
|
|
40
|
-
content: Array<{ type: string; text
|
|
40
|
+
content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
interface HealthCheckResult {
|
|
@@ -238,19 +238,30 @@ export default function register(api: PluginApi) {
|
|
|
238
238
|
api.registerTool((ctx: ToolContext) => ({
|
|
239
239
|
name: "camofox_snapshot",
|
|
240
240
|
description:
|
|
241
|
-
"Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction
|
|
241
|
+
"Get accessibility snapshot of a Camoufox page with element refs (e1, e2, etc.) for interaction, plus a visual screenshot. " +
|
|
242
|
+
"Large pages are truncated with pagination links preserved at the bottom. " +
|
|
243
|
+
"If the response includes hasMore=true and nextOffset, call again with that offset to see more content.",
|
|
242
244
|
parameters: {
|
|
243
245
|
type: "object",
|
|
244
246
|
properties: {
|
|
245
247
|
tabId: { type: "string", description: "Tab identifier" },
|
|
248
|
+
offset: { type: "number", description: "Character offset for paginated snapshots. Use nextOffset from a previous truncated response." },
|
|
246
249
|
},
|
|
247
250
|
required: ["tabId"],
|
|
248
251
|
},
|
|
249
252
|
async execute(_id, params) {
|
|
250
|
-
const { tabId } = params as { tabId: string };
|
|
253
|
+
const { tabId, offset } = params as { tabId: string; offset?: number };
|
|
251
254
|
const userId = ctx.agentId || fallbackUserId;
|
|
252
|
-
const
|
|
253
|
-
|
|
255
|
+
const qs = offset ? `&offset=${offset}` : '';
|
|
256
|
+
const result = await fetchApi(baseUrl, `/tabs/${tabId}/snapshot?userId=${userId}&includeScreenshot=true${qs}`) as Record<string, unknown>;
|
|
257
|
+
const content: ToolResult["content"] = [
|
|
258
|
+
{ type: "text", text: JSON.stringify({ url: result.url, refsCount: result.refsCount, snapshot: result.snapshot, truncated: result.truncated, totalChars: result.totalChars, hasMore: result.hasMore, nextOffset: result.nextOffset }, null, 2) },
|
|
259
|
+
];
|
|
260
|
+
const screenshot = result.screenshot as { data?: string; mimeType?: string } | undefined;
|
|
261
|
+
if (screenshot?.data) {
|
|
262
|
+
content.push({ type: "image", data: screenshot.data, mimeType: screenshot.mimeType || "image/png" });
|
|
263
|
+
}
|
|
264
|
+
return { content };
|
|
254
265
|
},
|
|
255
266
|
}));
|
|
256
267
|
|
package/server.js
CHANGED
|
@@ -5,6 +5,8 @@ const crypto = require('crypto');
|
|
|
5
5
|
const os = require('os');
|
|
6
6
|
const { expandMacro } = require('./lib/macros');
|
|
7
7
|
const { loadConfig } = require('./lib/config');
|
|
8
|
+
const { windowSnapshot } = require('./lib/snapshot');
|
|
9
|
+
const { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml } = require('./lib/youtube');
|
|
8
10
|
|
|
9
11
|
const CONFIG = loadConfig();
|
|
10
12
|
|
|
@@ -171,13 +173,18 @@ let browser = null;
|
|
|
171
173
|
// Note: sessionKey was previously called listItemId - both are accepted for backward compatibility
|
|
172
174
|
const sessions = new Map();
|
|
173
175
|
|
|
174
|
-
const SESSION_TIMEOUT_MS =
|
|
176
|
+
const SESSION_TIMEOUT_MS = CONFIG.sessionTimeoutMs;
|
|
175
177
|
const MAX_SNAPSHOT_NODES = 500;
|
|
176
|
-
const MAX_SESSIONS =
|
|
177
|
-
const MAX_TABS_PER_SESSION =
|
|
178
|
-
const
|
|
179
|
-
const
|
|
178
|
+
const MAX_SESSIONS = CONFIG.maxSessions;
|
|
179
|
+
const MAX_TABS_PER_SESSION = CONFIG.maxTabsPerSession;
|
|
180
|
+
const MAX_TABS_GLOBAL = CONFIG.maxTabsGlobal;
|
|
181
|
+
const HANDLER_TIMEOUT_MS = CONFIG.handlerTimeoutMs;
|
|
182
|
+
const MAX_CONCURRENT_PER_USER = CONFIG.maxConcurrentPerUser;
|
|
180
183
|
const PAGE_CLOSE_TIMEOUT_MS = 5000;
|
|
184
|
+
const NAVIGATE_TIMEOUT_MS = CONFIG.navigateTimeoutMs;
|
|
185
|
+
const BUILDREFS_TIMEOUT_MS = CONFIG.buildrefsTimeoutMs;
|
|
186
|
+
const FAILURE_THRESHOLD = 3;
|
|
187
|
+
const TAB_LOCK_TIMEOUT_MS = 30000;
|
|
181
188
|
|
|
182
189
|
// Per-tab locks to serialize operations on the same tab
|
|
183
190
|
// tabId -> Promise (the currently executing operation)
|
|
@@ -188,9 +195,14 @@ async function withTabLock(tabId, operation) {
|
|
|
188
195
|
const pending = tabLocks.get(tabId);
|
|
189
196
|
if (pending) {
|
|
190
197
|
try {
|
|
191
|
-
await
|
|
198
|
+
await Promise.race([
|
|
199
|
+
pending,
|
|
200
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Tab lock timeout')), TAB_LOCK_TIMEOUT_MS))
|
|
201
|
+
]);
|
|
192
202
|
} catch (e) {
|
|
193
|
-
|
|
203
|
+
if (e.message === 'Tab lock timeout') {
|
|
204
|
+
log('warn', 'tab lock timeout, proceeding', { tabId });
|
|
205
|
+
}
|
|
194
206
|
}
|
|
195
207
|
}
|
|
196
208
|
|
|
@@ -233,9 +245,13 @@ async function withUserLimit(userId, operation) {
|
|
|
233
245
|
});
|
|
234
246
|
}
|
|
235
247
|
state.active++;
|
|
248
|
+
healthState.activeOps++;
|
|
236
249
|
try {
|
|
237
|
-
|
|
250
|
+
const result = await operation();
|
|
251
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
252
|
+
return result;
|
|
238
253
|
} finally {
|
|
254
|
+
healthState.activeOps--;
|
|
239
255
|
state.active--;
|
|
240
256
|
if (state.queue.length > 0) {
|
|
241
257
|
const next = state.queue.shift();
|
|
@@ -282,7 +298,7 @@ function buildProxyConfig() {
|
|
|
282
298
|
};
|
|
283
299
|
}
|
|
284
300
|
|
|
285
|
-
const BROWSER_IDLE_TIMEOUT_MS =
|
|
301
|
+
const BROWSER_IDLE_TIMEOUT_MS = CONFIG.browserIdleTimeoutMs;
|
|
286
302
|
let browserIdleTimer = null;
|
|
287
303
|
let browserLaunchPromise = null;
|
|
288
304
|
|
|
@@ -307,6 +323,59 @@ function clearBrowserIdleTimer() {
|
|
|
307
323
|
}
|
|
308
324
|
}
|
|
309
325
|
|
|
326
|
+
// --- Browser health tracking ---
|
|
327
|
+
const healthState = {
|
|
328
|
+
consecutiveNavFailures: 0,
|
|
329
|
+
lastSuccessfulNav: Date.now(),
|
|
330
|
+
isRecovering: false,
|
|
331
|
+
activeOps: 0,
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
function recordNavSuccess() {
|
|
335
|
+
healthState.consecutiveNavFailures = 0;
|
|
336
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function recordNavFailure() {
|
|
340
|
+
healthState.consecutiveNavFailures++;
|
|
341
|
+
return healthState.consecutiveNavFailures >= FAILURE_THRESHOLD;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async function restartBrowser(reason) {
|
|
345
|
+
if (healthState.isRecovering) return;
|
|
346
|
+
healthState.isRecovering = true;
|
|
347
|
+
log('error', 'restarting browser', { reason, failures: healthState.consecutiveNavFailures });
|
|
348
|
+
try {
|
|
349
|
+
for (const [, session] of sessions) {
|
|
350
|
+
await session.context.close().catch(() => {});
|
|
351
|
+
}
|
|
352
|
+
sessions.clear();
|
|
353
|
+
if (browser) {
|
|
354
|
+
await browser.close().catch(() => {});
|
|
355
|
+
browser = null;
|
|
356
|
+
}
|
|
357
|
+
browserLaunchPromise = null;
|
|
358
|
+
await ensureBrowser();
|
|
359
|
+
healthState.consecutiveNavFailures = 0;
|
|
360
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
361
|
+
log('info', 'browser restarted successfully');
|
|
362
|
+
} catch (err) {
|
|
363
|
+
log('error', 'browser restart failed', { error: err.message });
|
|
364
|
+
} finally {
|
|
365
|
+
healthState.isRecovering = false;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function getTotalTabCount() {
|
|
370
|
+
let total = 0;
|
|
371
|
+
for (const session of sessions.values()) {
|
|
372
|
+
for (const group of session.tabGroups.values()) {
|
|
373
|
+
total += group.size;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
return total;
|
|
377
|
+
}
|
|
378
|
+
|
|
310
379
|
async function launchBrowserInstance() {
|
|
311
380
|
const hostOS = getHostOS();
|
|
312
381
|
const proxy = buildProxyConfig();
|
|
@@ -406,7 +475,8 @@ function createTabState(page) {
|
|
|
406
475
|
page,
|
|
407
476
|
refs: new Map(),
|
|
408
477
|
visitedUrls: new Set(),
|
|
409
|
-
toolCalls: 0
|
|
478
|
+
toolCalls: 0,
|
|
479
|
+
lastSnapshot: null,
|
|
410
480
|
};
|
|
411
481
|
}
|
|
412
482
|
|
|
@@ -507,19 +577,47 @@ async function buildRefs(page) {
|
|
|
507
577
|
return refs;
|
|
508
578
|
}
|
|
509
579
|
|
|
580
|
+
const start = Date.now();
|
|
581
|
+
|
|
582
|
+
// Hard total timeout on the entire buildRefs operation
|
|
583
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
584
|
+
setTimeout(() => reject(new Error('buildRefs_timeout')), BUILDREFS_TIMEOUT_MS)
|
|
585
|
+
);
|
|
586
|
+
|
|
587
|
+
try {
|
|
588
|
+
return await Promise.race([
|
|
589
|
+
_buildRefsInner(page, refs, start),
|
|
590
|
+
timeoutPromise
|
|
591
|
+
]);
|
|
592
|
+
} catch (err) {
|
|
593
|
+
if (err.message === 'buildRefs_timeout') {
|
|
594
|
+
log('warn', 'buildRefs: total timeout exceeded', { elapsed: Date.now() - start });
|
|
595
|
+
return refs;
|
|
596
|
+
}
|
|
597
|
+
throw err;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
async function _buildRefsInner(page, refs, start) {
|
|
510
602
|
await waitForPageReady(page, { waitForNetwork: false });
|
|
511
603
|
|
|
512
|
-
//
|
|
513
|
-
|
|
514
|
-
|
|
604
|
+
// Budget remaining time for ariaSnapshot
|
|
605
|
+
const elapsed = Date.now() - start;
|
|
606
|
+
const remaining = BUILDREFS_TIMEOUT_MS - elapsed;
|
|
607
|
+
if (remaining < 2000) {
|
|
608
|
+
log('warn', 'buildRefs: insufficient time for ariaSnapshot', { elapsed });
|
|
609
|
+
return refs;
|
|
610
|
+
}
|
|
611
|
+
|
|
515
612
|
let ariaYaml;
|
|
516
613
|
try {
|
|
517
|
-
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 5000 });
|
|
614
|
+
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(remaining - 1000, 5000) });
|
|
518
615
|
} catch (err) {
|
|
519
616
|
log('warn', 'ariaSnapshot failed, retrying');
|
|
617
|
+
const retryBudget = BUILDREFS_TIMEOUT_MS - (Date.now() - start);
|
|
618
|
+
if (retryBudget < 2000) return refs;
|
|
520
619
|
try {
|
|
521
|
-
await page.
|
|
522
|
-
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: 5000 });
|
|
620
|
+
ariaYaml = await page.locator('body').ariaSnapshot({ timeout: Math.min(retryBudget - 500, 5000) });
|
|
523
621
|
} catch (retryErr) {
|
|
524
622
|
log('warn', 'ariaSnapshot retry failed, returning empty refs', { error: retryErr.message });
|
|
525
623
|
return refs;
|
|
@@ -593,15 +691,140 @@ function refToLocator(page, ref, refs) {
|
|
|
593
691
|
return locator;
|
|
594
692
|
}
|
|
595
693
|
|
|
596
|
-
//
|
|
694
|
+
// --- YouTube transcript ---
|
|
695
|
+
// Implementation extracted to lib/youtube.js to avoid scanner false positives
|
|
696
|
+
// (child_process + app.post in same file triggers OpenClaw skill-scanner)
|
|
697
|
+
|
|
698
|
+
detectYtDlp(log);
|
|
699
|
+
|
|
700
|
+
app.post('/youtube/transcript', async (req, res) => {
|
|
701
|
+
const reqId = req.reqId;
|
|
702
|
+
try {
|
|
703
|
+
const { url, languages = ['en'] } = req.body;
|
|
704
|
+
if (!url) return res.status(400).json({ error: 'url is required' });
|
|
705
|
+
|
|
706
|
+
const urlErr = validateUrl(url);
|
|
707
|
+
if (urlErr) return res.status(400).json({ error: urlErr });
|
|
708
|
+
|
|
709
|
+
const videoIdMatch = url.match(
|
|
710
|
+
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})/
|
|
711
|
+
);
|
|
712
|
+
if (!videoIdMatch) {
|
|
713
|
+
return res.status(400).json({ error: 'Could not extract YouTube video ID from URL' });
|
|
714
|
+
}
|
|
715
|
+
const videoId = videoIdMatch[1];
|
|
716
|
+
const lang = languages[0] || 'en';
|
|
717
|
+
|
|
718
|
+
log('info', 'youtube transcript: starting', { reqId, videoId, lang, method: hasYtDlp() ? 'yt-dlp' : 'browser' });
|
|
719
|
+
|
|
720
|
+
let result;
|
|
721
|
+
if (hasYtDlp()) {
|
|
722
|
+
result = await ytDlpTranscript(reqId, url, videoId, lang);
|
|
723
|
+
} else {
|
|
724
|
+
result = await browserTranscript(reqId, url, videoId, lang);
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
log('info', 'youtube transcript: done', { reqId, videoId, status: result.status, words: result.total_words });
|
|
728
|
+
res.json(result);
|
|
729
|
+
} catch (err) {
|
|
730
|
+
log('error', 'youtube transcript failed', { reqId, error: err.message, stack: err.stack });
|
|
731
|
+
res.status(500).json({ error: safeError(err) });
|
|
732
|
+
}
|
|
733
|
+
});
|
|
734
|
+
|
|
735
|
+
// Browser fallback — play video, intercept timedtext network response
|
|
736
|
+
async function browserTranscript(reqId, url, videoId, lang) {
|
|
737
|
+
return await withUserLimit('__yt_transcript__', async () => {
|
|
738
|
+
await ensureBrowser();
|
|
739
|
+
const session = await getSession('__yt_transcript__');
|
|
740
|
+
const page = await session.context.newPage();
|
|
741
|
+
|
|
742
|
+
try {
|
|
743
|
+
await page.addInitScript(() => {
|
|
744
|
+
const origPlay = HTMLMediaElement.prototype.play;
|
|
745
|
+
HTMLMediaElement.prototype.play = function() { this.volume = 0; this.muted = true; return origPlay.call(this); };
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
let interceptedCaptions = null;
|
|
749
|
+
page.on('response', async (response) => {
|
|
750
|
+
const respUrl = response.url();
|
|
751
|
+
if (respUrl.includes('/api/timedtext') && respUrl.includes(`v=${videoId}`) && !interceptedCaptions) {
|
|
752
|
+
try {
|
|
753
|
+
const body = await response.text();
|
|
754
|
+
if (body && body.length > 0) interceptedCaptions = body;
|
|
755
|
+
} catch {}
|
|
756
|
+
}
|
|
757
|
+
});
|
|
758
|
+
|
|
759
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATE_TIMEOUT_MS });
|
|
760
|
+
await page.waitForTimeout(2000);
|
|
761
|
+
|
|
762
|
+
const meta = await page.evaluate(() => {
|
|
763
|
+
const r = window.ytInitialPlayerResponse || (typeof ytInitialPlayerResponse !== 'undefined' ? ytInitialPlayerResponse : null);
|
|
764
|
+
if (!r) return { title: '' };
|
|
765
|
+
const tracks = r?.captions?.playerCaptionsTracklistRenderer?.captionTracks || [];
|
|
766
|
+
return {
|
|
767
|
+
title: r?.videoDetails?.title || '',
|
|
768
|
+
languages: tracks.map(t => ({ code: t.languageCode, name: t.name?.simpleText || t.languageCode, kind: t.kind || 'manual' })),
|
|
769
|
+
};
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
await page.evaluate(() => {
|
|
773
|
+
const v = document.querySelector('video');
|
|
774
|
+
if (v) { v.muted = true; v.play().catch(() => {}); }
|
|
775
|
+
}).catch(() => {});
|
|
776
|
+
|
|
777
|
+
for (let i = 0; i < 40 && !interceptedCaptions; i++) {
|
|
778
|
+
await page.waitForTimeout(500);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
if (!interceptedCaptions) {
|
|
782
|
+
return {
|
|
783
|
+
status: 'error', code: 404,
|
|
784
|
+
message: 'No captions loaded during playback (video may have no captions, or ad blocked it)',
|
|
785
|
+
video_url: url, video_id: videoId, title: meta.title,
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
log('info', 'youtube transcript: intercepted captions', { reqId, len: interceptedCaptions.length });
|
|
790
|
+
|
|
791
|
+
let transcriptText = null;
|
|
792
|
+
if (interceptedCaptions.trimStart().startsWith('{')) transcriptText = parseJson3(interceptedCaptions);
|
|
793
|
+
else if (interceptedCaptions.includes('WEBVTT')) transcriptText = parseVtt(interceptedCaptions);
|
|
794
|
+
else if (interceptedCaptions.includes('<text')) transcriptText = parseXml(interceptedCaptions);
|
|
795
|
+
|
|
796
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
797
|
+
return {
|
|
798
|
+
status: 'error', code: 404,
|
|
799
|
+
message: 'Caption data intercepted but could not be parsed',
|
|
800
|
+
video_url: url, video_id: videoId, title: meta.title,
|
|
801
|
+
};
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
return {
|
|
805
|
+
status: 'ok', transcript: transcriptText,
|
|
806
|
+
video_url: url, video_id: videoId, video_title: meta.title,
|
|
807
|
+
language: lang, total_words: transcriptText.split(/\s+/).length,
|
|
808
|
+
available_languages: meta.languages,
|
|
809
|
+
};
|
|
810
|
+
} finally {
|
|
811
|
+
await safePageClose(page);
|
|
812
|
+
}
|
|
813
|
+
});
|
|
814
|
+
}
|
|
815
|
+
|
|
597
816
|
app.get('/health', (req, res) => {
|
|
817
|
+
if (healthState.isRecovering) {
|
|
818
|
+
return res.status(503).json({ ok: false, engine: 'camoufox', recovering: true });
|
|
819
|
+
}
|
|
598
820
|
const running = browser !== null && (browser.isConnected?.() ?? false);
|
|
599
821
|
res.json({
|
|
600
822
|
ok: true,
|
|
601
823
|
engine: 'camoufox',
|
|
602
824
|
browserConnected: running,
|
|
603
825
|
browserRunning: running,
|
|
604
|
-
|
|
826
|
+
activeTabs: getTotalTabCount(),
|
|
827
|
+
consecutiveFailures: healthState.consecutiveNavFailures,
|
|
605
828
|
});
|
|
606
829
|
});
|
|
607
830
|
|
|
@@ -658,23 +881,46 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
658
881
|
let session = sessions.get(normalizeUserId(userId));
|
|
659
882
|
let found = session && findTab(session, tabId);
|
|
660
883
|
|
|
884
|
+
let tabState;
|
|
661
885
|
if (!found) {
|
|
662
886
|
const resolvedSessionKey = sessionKey || listItemId || 'default';
|
|
663
887
|
session = await getSession(userId);
|
|
664
|
-
let
|
|
665
|
-
for (const g of session.tabGroups.values())
|
|
666
|
-
if (
|
|
667
|
-
|
|
888
|
+
let sessionTabs = 0;
|
|
889
|
+
for (const g of session.tabGroups.values()) sessionTabs += g.size;
|
|
890
|
+
if (getTotalTabCount() >= MAX_TABS_GLOBAL || sessionTabs >= MAX_TABS_PER_SESSION) {
|
|
891
|
+
// Reuse oldest tab in session instead of rejecting
|
|
892
|
+
let oldestTab = null;
|
|
893
|
+
let oldestGroup = null;
|
|
894
|
+
let oldestTabId = null;
|
|
895
|
+
for (const [gKey, group] of session.tabGroups) {
|
|
896
|
+
for (const [tid, ts] of group) {
|
|
897
|
+
if (!oldestTab || ts.toolCalls < oldestTab.toolCalls) {
|
|
898
|
+
oldestTab = ts;
|
|
899
|
+
oldestGroup = group;
|
|
900
|
+
oldestTabId = tid;
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
if (oldestTab) {
|
|
905
|
+
tabState = oldestTab;
|
|
906
|
+
const group = getTabGroup(session, resolvedSessionKey);
|
|
907
|
+
if (oldestGroup) oldestGroup.delete(oldestTabId);
|
|
908
|
+
group.set(tabId, tabState);
|
|
909
|
+
tabLocks.delete(oldestTabId);
|
|
910
|
+
log('info', 'tab recycled (limit reached)', { reqId: req.reqId, tabId, recycledFrom: oldestTabId, userId });
|
|
911
|
+
} else {
|
|
912
|
+
throw new Error('Maximum tabs per session reached');
|
|
913
|
+
}
|
|
914
|
+
} else {
|
|
915
|
+
const page = await session.context.newPage();
|
|
916
|
+
tabState = createTabState(page);
|
|
917
|
+
const group = getTabGroup(session, resolvedSessionKey);
|
|
918
|
+
group.set(tabId, tabState);
|
|
919
|
+
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
668
920
|
}
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
const group = getTabGroup(session, resolvedSessionKey);
|
|
672
|
-
group.set(tabId, newTabState);
|
|
673
|
-
found = { tabState: newTabState, listItemId: resolvedSessionKey, group };
|
|
674
|
-
log('info', 'tab auto-created on navigate', { reqId: req.reqId, tabId, userId });
|
|
921
|
+
} else {
|
|
922
|
+
tabState = found.tabState;
|
|
675
923
|
}
|
|
676
|
-
|
|
677
|
-
const { tabState } = found;
|
|
678
924
|
tabState.toolCalls++;
|
|
679
925
|
|
|
680
926
|
let targetUrl = url;
|
|
@@ -690,8 +936,9 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
690
936
|
return await withTabLock(tabId, async () => {
|
|
691
937
|
await tabState.page.goto(targetUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
692
938
|
tabState.visitedUrls.add(targetUrl);
|
|
939
|
+
tabState.lastSnapshot = null;
|
|
693
940
|
tabState.refs = await buildRefs(tabState.page);
|
|
694
|
-
return { ok: true, tabId, url: tabState.page.url() };
|
|
941
|
+
return { ok: true, tabId, url: tabState.page.url(), refsAvailable: tabState.refs.size > 0 };
|
|
695
942
|
});
|
|
696
943
|
})(), HANDLER_TIMEOUT_MS, 'navigate'));
|
|
697
944
|
|
|
@@ -699,7 +946,8 @@ app.post('/tabs/:tabId/navigate', async (req, res) => {
|
|
|
699
946
|
res.json(result);
|
|
700
947
|
} catch (err) {
|
|
701
948
|
log('error', 'navigate failed', { reqId: req.reqId, tabId, error: err.message });
|
|
702
|
-
|
|
949
|
+
const status = err.message && err.message.startsWith('Blocked URL scheme') ? 400 : 500;
|
|
950
|
+
res.status(status).json({ error: safeError(err) });
|
|
703
951
|
}
|
|
704
952
|
});
|
|
705
953
|
|
|
@@ -709,6 +957,7 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
709
957
|
const userId = req.query.userId;
|
|
710
958
|
if (!userId) return res.status(400).json({ error: 'userId required' });
|
|
711
959
|
const format = req.query.format || 'text';
|
|
960
|
+
const offset = parseInt(req.query.offset) || 0;
|
|
712
961
|
const session = sessions.get(normalizeUserId(userId));
|
|
713
962
|
const found = session && findTab(session, req.params.tabId);
|
|
714
963
|
if (!found) return res.status(404).json({ error: 'Tab not found' });
|
|
@@ -716,6 +965,18 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
716
965
|
const { tabState } = found;
|
|
717
966
|
tabState.toolCalls++;
|
|
718
967
|
|
|
968
|
+
// Cached chunk retrieval for offset>0 requests
|
|
969
|
+
if (offset > 0 && tabState.lastSnapshot) {
|
|
970
|
+
const win = windowSnapshot(tabState.lastSnapshot, offset);
|
|
971
|
+
const response = { url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
|
|
972
|
+
if (req.query.includeScreenshot === 'true') {
|
|
973
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
974
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
975
|
+
}
|
|
976
|
+
log('info', 'snapshot (cached offset)', { reqId: req.reqId, tabId: req.params.tabId, offset, totalChars: win.totalChars });
|
|
977
|
+
return res.json(response);
|
|
978
|
+
}
|
|
979
|
+
|
|
719
980
|
const result = await withUserLimit(userId, () => withTimeout((async () => {
|
|
720
981
|
tabState.refs = await buildRefs(tabState.page);
|
|
721
982
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
@@ -754,14 +1015,28 @@ app.get('/tabs/:tabId/snapshot', async (req, res) => {
|
|
|
754
1015
|
}).join('\n');
|
|
755
1016
|
}
|
|
756
1017
|
|
|
757
|
-
|
|
1018
|
+
tabState.lastSnapshot = annotatedYaml;
|
|
1019
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
1020
|
+
|
|
1021
|
+
const response = {
|
|
758
1022
|
url: tabState.page.url(),
|
|
759
|
-
snapshot:
|
|
760
|
-
refsCount: tabState.refs.size
|
|
1023
|
+
snapshot: win.text,
|
|
1024
|
+
refsCount: tabState.refs.size,
|
|
1025
|
+
truncated: win.truncated,
|
|
1026
|
+
totalChars: win.totalChars,
|
|
1027
|
+
hasMore: win.hasMore,
|
|
1028
|
+
nextOffset: win.nextOffset,
|
|
761
1029
|
};
|
|
1030
|
+
|
|
1031
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1032
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1033
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return response;
|
|
762
1037
|
})(), HANDLER_TIMEOUT_MS, 'snapshot'));
|
|
763
1038
|
|
|
764
|
-
log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount });
|
|
1039
|
+
log('info', 'snapshot', { reqId: req.reqId, tabId: req.params.tabId, url: result.url, snapshotLen: result.snapshot?.length, refsCount: result.refsCount, hasScreenshot: !!result.screenshot, truncated: result.truncated });
|
|
765
1040
|
res.json(result);
|
|
766
1041
|
} catch (err) {
|
|
767
1042
|
log('error', 'snapshot failed', { reqId: req.reqId, tabId: req.params.tabId, error: err.message });
|
|
@@ -844,7 +1119,7 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
844
1119
|
log('warn', 'force click failed, trying mouse sequence');
|
|
845
1120
|
await dispatchMouseSequence(locator);
|
|
846
1121
|
}
|
|
847
|
-
} else if (err.message.includes('not visible') || err.message.includes('timeout')) {
|
|
1122
|
+
} else if (err.message.includes('not visible') || err.message.toLowerCase().includes('timeout')) {
|
|
848
1123
|
// Fallback 2: Element not responding to click, try mouse sequence
|
|
849
1124
|
log('warn', 'click timeout, trying mouse sequence');
|
|
850
1125
|
await dispatchMouseSequence(locator);
|
|
@@ -855,7 +1130,13 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
855
1130
|
};
|
|
856
1131
|
|
|
857
1132
|
if (ref) {
|
|
858
|
-
|
|
1133
|
+
let locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1134
|
+
if (!locator && tabState.refs.size === 0) {
|
|
1135
|
+
// Auto-refresh refs on stale state before failing
|
|
1136
|
+
log('info', 'auto-refreshing stale refs before click', { ref });
|
|
1137
|
+
tabState.refs = await buildRefs(tabState.page);
|
|
1138
|
+
locator = refToLocator(tabState.page, ref, tabState.refs);
|
|
1139
|
+
}
|
|
859
1140
|
if (!locator) {
|
|
860
1141
|
const maxRef = tabState.refs.size > 0 ? `e${tabState.refs.size}` : 'none';
|
|
861
1142
|
throw new Error(`Unknown ref: ${ref} (valid refs: e1-${maxRef}, ${tabState.refs.size} total). Refs reset after navigation - call snapshot first.`);
|
|
@@ -866,11 +1147,12 @@ app.post('/tabs/:tabId/click', async (req, res) => {
|
|
|
866
1147
|
}
|
|
867
1148
|
|
|
868
1149
|
await tabState.page.waitForTimeout(500);
|
|
1150
|
+
tabState.lastSnapshot = null;
|
|
869
1151
|
tabState.refs = await buildRefs(tabState.page);
|
|
870
1152
|
|
|
871
1153
|
const newUrl = tabState.page.url();
|
|
872
1154
|
tabState.visitedUrls.add(newUrl);
|
|
873
|
-
return { ok: true, url: newUrl };
|
|
1155
|
+
return { ok: true, url: newUrl, refsAvailable: tabState.refs.size > 0 };
|
|
874
1156
|
}), HANDLER_TIMEOUT_MS, 'click'));
|
|
875
1157
|
|
|
876
1158
|
log('info', 'clicked', { reqId: req.reqId, tabId, url: result.url });
|
|
@@ -1215,7 +1497,6 @@ app.get('/', (req, res) => {
|
|
|
1215
1497
|
engine: 'camoufox',
|
|
1216
1498
|
browserConnected: running,
|
|
1217
1499
|
browserRunning: running,
|
|
1218
|
-
sessions: sessions.size,
|
|
1219
1500
|
});
|
|
1220
1501
|
});
|
|
1221
1502
|
|
|
@@ -1364,6 +1645,7 @@ app.post('/navigate', async (req, res) => {
|
|
|
1364
1645
|
app.get('/snapshot', async (req, res) => {
|
|
1365
1646
|
try {
|
|
1366
1647
|
const { targetId, userId, format = 'text' } = req.query;
|
|
1648
|
+
const offset = parseInt(req.query.offset) || 0;
|
|
1367
1649
|
if (!userId) {
|
|
1368
1650
|
return res.status(400).json({ error: 'userId is required' });
|
|
1369
1651
|
}
|
|
@@ -1376,6 +1658,18 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1376
1658
|
|
|
1377
1659
|
const { tabState } = found;
|
|
1378
1660
|
tabState.toolCalls++;
|
|
1661
|
+
|
|
1662
|
+
// Cached chunk retrieval
|
|
1663
|
+
if (offset > 0 && tabState.lastSnapshot) {
|
|
1664
|
+
const win = windowSnapshot(tabState.lastSnapshot, offset);
|
|
1665
|
+
const response = { ok: true, format: 'aria', targetId, url: tabState.page.url(), snapshot: win.text, refsCount: tabState.refs.size, truncated: win.truncated, totalChars: win.totalChars, hasMore: win.hasMore, nextOffset: win.nextOffset };
|
|
1666
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1667
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1668
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1669
|
+
}
|
|
1670
|
+
return res.json(response);
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1379
1673
|
tabState.refs = await buildRefs(tabState.page);
|
|
1380
1674
|
|
|
1381
1675
|
const ariaYaml = await getAriaSnapshot(tabState.page);
|
|
@@ -1404,14 +1698,28 @@ app.get('/snapshot', async (req, res) => {
|
|
|
1404
1698
|
}).join('\n');
|
|
1405
1699
|
}
|
|
1406
1700
|
|
|
1407
|
-
|
|
1701
|
+
tabState.lastSnapshot = annotatedYaml;
|
|
1702
|
+
const win = windowSnapshot(annotatedYaml, 0);
|
|
1703
|
+
|
|
1704
|
+
const response = {
|
|
1408
1705
|
ok: true,
|
|
1409
1706
|
format: 'aria',
|
|
1410
1707
|
targetId,
|
|
1411
1708
|
url: tabState.page.url(),
|
|
1412
|
-
snapshot:
|
|
1413
|
-
refsCount: tabState.refs.size
|
|
1414
|
-
|
|
1709
|
+
snapshot: win.text,
|
|
1710
|
+
refsCount: tabState.refs.size,
|
|
1711
|
+
truncated: win.truncated,
|
|
1712
|
+
totalChars: win.totalChars,
|
|
1713
|
+
hasMore: win.hasMore,
|
|
1714
|
+
nextOffset: win.nextOffset,
|
|
1715
|
+
};
|
|
1716
|
+
|
|
1717
|
+
if (req.query.includeScreenshot === 'true') {
|
|
1718
|
+
const pngBuffer = await tabState.page.screenshot({ type: 'png' });
|
|
1719
|
+
response.screenshot = { data: pngBuffer.toString('base64'), mimeType: 'image/png' };
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
res.json(response);
|
|
1415
1723
|
} catch (err) {
|
|
1416
1724
|
log('error', 'openclaw snapshot failed', { reqId: req.reqId, error: err.message });
|
|
1417
1725
|
res.status(500).json({ error: safeError(err) });
|
|
@@ -1584,6 +1892,32 @@ setInterval(() => {
|
|
|
1584
1892
|
});
|
|
1585
1893
|
}, 5 * 60_000);
|
|
1586
1894
|
|
|
1895
|
+
// Active health probe — detect hung browser even when isConnected() lies
|
|
1896
|
+
setInterval(async () => {
|
|
1897
|
+
if (!browser || healthState.isRecovering) return;
|
|
1898
|
+
// Skip probe if operations are in flight
|
|
1899
|
+
if (healthState.activeOps > 0) {
|
|
1900
|
+
log('info', 'health probe skipped, operations active', { activeOps: healthState.activeOps });
|
|
1901
|
+
return;
|
|
1902
|
+
}
|
|
1903
|
+
const timeSinceSuccess = Date.now() - healthState.lastSuccessfulNav;
|
|
1904
|
+
if (timeSinceSuccess < 120000) return;
|
|
1905
|
+
|
|
1906
|
+
let testContext;
|
|
1907
|
+
try {
|
|
1908
|
+
testContext = await browser.newContext();
|
|
1909
|
+
const page = await testContext.newPage();
|
|
1910
|
+
await page.goto('about:blank', { timeout: 5000 });
|
|
1911
|
+
await page.close();
|
|
1912
|
+
await testContext.close();
|
|
1913
|
+
healthState.lastSuccessfulNav = Date.now();
|
|
1914
|
+
} catch (err) {
|
|
1915
|
+
log('warn', 'health probe failed', { error: err.message, timeSinceSuccessMs: timeSinceSuccess });
|
|
1916
|
+
if (testContext) await testContext.close().catch(() => {});
|
|
1917
|
+
restartBrowser('health probe failed').catch(() => {});
|
|
1918
|
+
}
|
|
1919
|
+
}, 60_000);
|
|
1920
|
+
|
|
1587
1921
|
// Crash logging
|
|
1588
1922
|
process.on('uncaughtException', (err) => {
|
|
1589
1923
|
log('error', 'uncaughtException', { error: err.message, stack: err.stack });
|