@askjo/camofox-browser 1.3.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/lib/config.js +5 -4
- package/lib/cookies.js +3 -3
- package/lib/downloads.js +240 -0
- package/lib/launcher.js +3 -3
- package/lib/macros.js +1 -1
- package/lib/metrics.js +99 -0
- package/lib/proxy.js +19 -0
- package/lib/snapshot.js +1 -1
- package/lib/youtube.js +160 -51
- package/openclaw.plugin.json +1 -1
- package/package.json +10 -5
- package/plugin.ts +23 -0
- package/scripts/sync-version.js +25 -0
- package/server.js +980 -163
package/lib/youtube.js
CHANGED
|
@@ -1,27 +1,94 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* YouTube transcript extraction via yt-dlp.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* with app.post routes in the same file (triggers OpenClaw scanner).
|
|
4
|
+
* Kept in a separate module so transcript process logic stays isolated.
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
import childProcess from 'child_process';
|
|
8
|
+
import { mkdtemp, readFile, readdir, rm } from 'fs/promises';
|
|
9
|
+
import { tmpdir } from 'os';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
|
|
12
|
+
const runProgram = childProcess.execFile;
|
|
13
|
+
|
|
14
|
+
const YT_DLP_CANDIDATES = ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp'];
|
|
15
|
+
const SAFE_ENV_KEYS = ['PATH', 'HOME', 'LANG', 'LC_ALL', 'LC_CTYPE', 'TMPDIR'];
|
|
16
|
+
const LANG_RE = /^[a-z]{2,3}(?:-[a-zA-Z0-9]{2,8})?$/;
|
|
12
17
|
|
|
13
18
|
// Detect yt-dlp binary at startup
|
|
14
19
|
let ytDlpPath = null;
|
|
15
20
|
|
|
21
|
+
function buildSafeEnv() {
|
|
22
|
+
const env = {};
|
|
23
|
+
for (const key of SAFE_ENV_KEYS) {
|
|
24
|
+
const value = process.env[key];
|
|
25
|
+
if (typeof value === 'string' && value.length > 0) {
|
|
26
|
+
env[key] = value;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return env;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeYoutubeUrl(rawUrl) {
|
|
33
|
+
const url = String(rawUrl || '').trim();
|
|
34
|
+
if (!url) {
|
|
35
|
+
throw new Error('Missing video URL');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let parsed;
|
|
39
|
+
try {
|
|
40
|
+
parsed = new URL(url);
|
|
41
|
+
} catch {
|
|
42
|
+
throw new Error('Invalid video URL');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
46
|
+
throw new Error('Unsupported URL scheme');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const host = parsed.hostname.toLowerCase();
|
|
50
|
+
const isYoutubeHost = host === 'youtube.com' || host.endsWith('.youtube.com');
|
|
51
|
+
const isShortHost = host === 'youtu.be';
|
|
52
|
+
if (!isYoutubeHost && !isShortHost) {
|
|
53
|
+
throw new Error('Only YouTube URLs are allowed');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return parsed.toString();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function normalizeLanguage(rawLang) {
|
|
60
|
+
const lang = String(rawLang || 'en').trim();
|
|
61
|
+
if (!LANG_RE.test(lang)) {
|
|
62
|
+
return 'en';
|
|
63
|
+
}
|
|
64
|
+
return lang;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function runYtDlp(binary, args, timeoutMs) {
|
|
68
|
+
return await new Promise((resolve, reject) => {
|
|
69
|
+
runProgram(
|
|
70
|
+
binary,
|
|
71
|
+
args,
|
|
72
|
+
{
|
|
73
|
+
timeout: timeoutMs,
|
|
74
|
+
windowsHide: true,
|
|
75
|
+
env: buildSafeEnv(),
|
|
76
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
77
|
+
},
|
|
78
|
+
(err, stdout = '', stderr = '') => {
|
|
79
|
+
if (err) {
|
|
80
|
+
return reject(new Error(`${err.message}\n${String(stderr).trim()}`.trim()));
|
|
81
|
+
}
|
|
82
|
+
resolve({ stdout: String(stdout), stderr: String(stderr) });
|
|
83
|
+
},
|
|
84
|
+
);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
16
88
|
async function detectYtDlp(log) {
|
|
17
|
-
for (const candidate of
|
|
89
|
+
for (const candidate of YT_DLP_CANDIDATES) {
|
|
18
90
|
try {
|
|
19
|
-
await
|
|
20
|
-
execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
|
|
21
|
-
if (err) return reject(err);
|
|
22
|
-
resolve(stdout.trim());
|
|
23
|
-
});
|
|
24
|
-
});
|
|
91
|
+
await runYtDlp(candidate, ['--version'], 5000);
|
|
25
92
|
ytDlpPath = candidate;
|
|
26
93
|
log('info', 'yt-dlp found', { path: candidate });
|
|
27
94
|
return;
|
|
@@ -35,38 +102,49 @@ function hasYtDlp() {
|
|
|
35
102
|
}
|
|
36
103
|
|
|
37
104
|
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
105
|
+
if (!ytDlpPath) {
|
|
106
|
+
throw new Error('yt-dlp is not available');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const normalizedUrl = normalizeYoutubeUrl(url);
|
|
110
|
+
const normalizedLang = normalizeLanguage(lang);
|
|
38
111
|
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
112
|
+
|
|
39
113
|
try {
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
execFile(ytDlpPath, [
|
|
114
|
+
const titleResult = await runYtDlp(
|
|
115
|
+
ytDlpPath,
|
|
116
|
+
['--skip-download', '--no-warnings', '--print', '%(title)s', normalizedUrl],
|
|
117
|
+
15000,
|
|
118
|
+
);
|
|
119
|
+
const title = titleResult.stdout.trim().split('\n')[0] || '';
|
|
120
|
+
|
|
121
|
+
await runYtDlp(
|
|
122
|
+
ytDlpPath,
|
|
123
|
+
[
|
|
51
124
|
'--skip-download',
|
|
52
|
-
'--write-sub',
|
|
53
|
-
'--sub
|
|
54
|
-
'--sub-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
125
|
+
'--write-sub',
|
|
126
|
+
'--write-auto-sub',
|
|
127
|
+
'--sub-lang',
|
|
128
|
+
normalizedLang,
|
|
129
|
+
'--sub-format',
|
|
130
|
+
'json3',
|
|
131
|
+
'-o',
|
|
132
|
+
join(tmpDir, '%(id)s'),
|
|
133
|
+
normalizedUrl,
|
|
134
|
+
],
|
|
135
|
+
30000,
|
|
136
|
+
);
|
|
62
137
|
|
|
63
138
|
const files = await readdir(tmpDir);
|
|
64
|
-
const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
139
|
+
const subFile = files.find((f) => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
65
140
|
if (!subFile) {
|
|
66
141
|
return {
|
|
67
|
-
status: 'error',
|
|
142
|
+
status: 'error',
|
|
143
|
+
code: 404,
|
|
68
144
|
message: 'No captions available for this video',
|
|
69
|
-
video_url:
|
|
145
|
+
video_url: normalizedUrl,
|
|
146
|
+
video_id: videoId,
|
|
147
|
+
title,
|
|
70
148
|
};
|
|
71
149
|
}
|
|
72
150
|
|
|
@@ -83,18 +161,24 @@ async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
|
83
161
|
|
|
84
162
|
if (!transcriptText || !transcriptText.trim()) {
|
|
85
163
|
return {
|
|
86
|
-
status: 'error',
|
|
164
|
+
status: 'error',
|
|
165
|
+
code: 404,
|
|
87
166
|
message: 'Subtitle file found but content was empty',
|
|
88
|
-
video_url:
|
|
167
|
+
video_url: normalizedUrl,
|
|
168
|
+
video_id: videoId,
|
|
169
|
+
title,
|
|
89
170
|
};
|
|
90
171
|
}
|
|
91
172
|
|
|
92
173
|
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
93
174
|
|
|
94
175
|
return {
|
|
95
|
-
status: 'ok',
|
|
96
|
-
|
|
97
|
-
|
|
176
|
+
status: 'ok',
|
|
177
|
+
transcript: transcriptText,
|
|
178
|
+
video_url: normalizedUrl,
|
|
179
|
+
video_id: videoId,
|
|
180
|
+
video_title: title,
|
|
181
|
+
language: langMatch?.[1] || normalizedLang,
|
|
98
182
|
total_words: transcriptText.split(/\s+/).length,
|
|
99
183
|
};
|
|
100
184
|
} finally {
|
|
@@ -112,7 +196,10 @@ function parseJson3(content) {
|
|
|
112
196
|
for (const event of events) {
|
|
113
197
|
const segs = event.segs || [];
|
|
114
198
|
if (!segs.length) continue;
|
|
115
|
-
const text = segs
|
|
199
|
+
const text = segs
|
|
200
|
+
.map((s) => s.utf8 || '')
|
|
201
|
+
.join('')
|
|
202
|
+
.trim();
|
|
116
203
|
if (!text) continue;
|
|
117
204
|
const tsMs = event.tStartMs || 0;
|
|
118
205
|
const tsSec = Math.floor(tsMs / 1000);
|
|
@@ -132,15 +219,31 @@ function parseVtt(content) {
|
|
|
132
219
|
let currentTimestamp = '';
|
|
133
220
|
for (const line of lines) {
|
|
134
221
|
const stripped = line.trim();
|
|
135
|
-
if (
|
|
222
|
+
if (
|
|
223
|
+
!stripped ||
|
|
224
|
+
stripped === 'WEBVTT' ||
|
|
225
|
+
stripped.startsWith('Kind:') ||
|
|
226
|
+
stripped.startsWith('Language:') ||
|
|
227
|
+
stripped.startsWith('NOTE')
|
|
228
|
+
)
|
|
229
|
+
continue;
|
|
136
230
|
if (stripped.includes(' --> ')) {
|
|
137
231
|
const parts = stripped.split(' --> ');
|
|
138
232
|
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
139
233
|
continue;
|
|
140
234
|
}
|
|
141
|
-
const text = stripped
|
|
142
|
-
|
|
143
|
-
|
|
235
|
+
const text = stripped
|
|
236
|
+
.replace(/<[^>]+>/g, '')
|
|
237
|
+
.replace(/&/g, '&')
|
|
238
|
+
.replace(/</g, '<')
|
|
239
|
+
.replace(/>/g, '>')
|
|
240
|
+
.replace(/"/g, '"')
|
|
241
|
+
.replace(/'/g, "'")
|
|
242
|
+
.trim();
|
|
243
|
+
if (text && currentTimestamp) {
|
|
244
|
+
result.push(`[${currentTimestamp}] ${text}`);
|
|
245
|
+
currentTimestamp = '';
|
|
246
|
+
} else if (text) result.push(text);
|
|
144
247
|
}
|
|
145
248
|
return result.join('\n');
|
|
146
249
|
}
|
|
@@ -148,10 +251,16 @@ function parseVtt(content) {
|
|
|
148
251
|
function parseXml(content) {
|
|
149
252
|
const lines = [];
|
|
150
253
|
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
151
|
-
|
|
152
|
-
while ((match = regex.exec(content)) !== null) {
|
|
254
|
+
for (const match of content.matchAll(regex)) {
|
|
153
255
|
const startSec = parseFloat(match[1]) || 0;
|
|
154
|
-
const text = match[2]
|
|
256
|
+
const text = match[2]
|
|
257
|
+
.replace(/<[^>]+>/g, '')
|
|
258
|
+
.replace(/&/g, '&')
|
|
259
|
+
.replace(/</g, '<')
|
|
260
|
+
.replace(/>/g, '>')
|
|
261
|
+
.replace(/"/g, '"')
|
|
262
|
+
.replace(/'/g, "'")
|
|
263
|
+
.trim();
|
|
155
264
|
if (!text) continue;
|
|
156
265
|
const mm = Math.floor(startSec / 60);
|
|
157
266
|
const ss = Math.floor(startSec % 60);
|
|
@@ -174,4 +283,4 @@ function formatVttTs(ts) {
|
|
|
174
283
|
return ts;
|
|
175
284
|
}
|
|
176
285
|
|
|
177
|
-
|
|
286
|
+
export { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askjo/camofox-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.1",
|
|
4
4
|
"description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"main": "server.js",
|
|
6
7
|
"license": "MIT",
|
|
7
8
|
"author": "Jo Inc <oss@askjo.ai>",
|
|
@@ -38,6 +39,7 @@
|
|
|
38
39
|
"lib/",
|
|
39
40
|
"plugin.ts",
|
|
40
41
|
"openclaw.plugin.json",
|
|
42
|
+
"scripts/",
|
|
41
43
|
"run.sh",
|
|
42
44
|
"Dockerfile",
|
|
43
45
|
"README.md",
|
|
@@ -50,10 +52,12 @@
|
|
|
50
52
|
},
|
|
51
53
|
"scripts": {
|
|
52
54
|
"start": "node server.js",
|
|
53
|
-
"test": "jest --runInBand --forceExit",
|
|
54
|
-
"test:e2e": "jest --runInBand --forceExit tests/e2e",
|
|
55
|
-
"test:live": "RUN_LIVE_TESTS=1 jest --runInBand --forceExit tests/live",
|
|
56
|
-
"test:debug": "DEBUG_SERVER=1 jest --runInBand --forceExit",
|
|
55
|
+
"test": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
56
|
+
"test:e2e": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/e2e",
|
|
57
|
+
"test:live": "RUN_LIVE_TESTS=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/live",
|
|
58
|
+
"test:debug": "DEBUG_SERVER=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
59
|
+
"version:sync": "node scripts/sync-version.js",
|
|
60
|
+
"version": "node scripts/sync-version.js && git add openclaw.plugin.json",
|
|
57
61
|
"postinstall": "npx camoufox-js fetch || true"
|
|
58
62
|
},
|
|
59
63
|
"dependencies": {
|
|
@@ -62,6 +66,7 @@
|
|
|
62
66
|
"playwright": "^1.50.0",
|
|
63
67
|
"playwright-core": "^1.58.0",
|
|
64
68
|
"playwright-extra": "^4.3.6",
|
|
69
|
+
"prom-client": "^15.1.3",
|
|
65
70
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
66
71
|
},
|
|
67
72
|
"devDependencies": {
|
package/plugin.ts
CHANGED
|
@@ -432,6 +432,29 @@ export default function register(api: PluginApi) {
|
|
|
432
432
|
},
|
|
433
433
|
}));
|
|
434
434
|
|
|
435
|
+
api.registerTool((ctx: ToolContext) => ({
|
|
436
|
+
name: "camofox_evaluate",
|
|
437
|
+
description:
|
|
438
|
+
"Execute JavaScript in a Camoufox tab's page context. Returns the result of the expression. Use for injecting scripts, reading page state, or calling web app APIs.",
|
|
439
|
+
parameters: {
|
|
440
|
+
type: "object",
|
|
441
|
+
properties: {
|
|
442
|
+
tabId: { type: "string", description: "Tab identifier" },
|
|
443
|
+
expression: { type: "string", description: "JavaScript expression to evaluate in the page context" },
|
|
444
|
+
},
|
|
445
|
+
required: ["tabId", "expression"],
|
|
446
|
+
},
|
|
447
|
+
async execute(_id, params) {
|
|
448
|
+
const { tabId, expression } = params as { tabId: string; expression: string };
|
|
449
|
+
const userId = ctx.agentId || fallbackUserId;
|
|
450
|
+
const result = await fetchApi(baseUrl, `/tabs/${tabId}/evaluate`, {
|
|
451
|
+
method: "POST",
|
|
452
|
+
body: JSON.stringify({ userId, expression }),
|
|
453
|
+
});
|
|
454
|
+
return toToolResult(result);
|
|
455
|
+
},
|
|
456
|
+
}));
|
|
457
|
+
|
|
435
458
|
api.registerTool((ctx: ToolContext) => ({
|
|
436
459
|
name: "camofox_list_tabs",
|
|
437
460
|
description: "List all open Camoufox tabs for a user.",
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Sync openclaw.plugin.json version with package.json.
|
|
4
|
+
* Run via: npm run version:sync
|
|
5
|
+
* Auto-runs on npm version via the "version" lifecycle script.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { readFile, writeFile } from 'node:fs/promises';
|
|
9
|
+
import { fileURLToPath } from 'node:url';
|
|
10
|
+
import { dirname, join } from 'node:path';
|
|
11
|
+
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const root = join(__dirname, '..');
|
|
14
|
+
|
|
15
|
+
const pkg = JSON.parse(await readFile(join(root, 'package.json'), 'utf8'));
|
|
16
|
+
const pluginPath = join(root, 'openclaw.plugin.json');
|
|
17
|
+
const plugin = JSON.parse(await readFile(pluginPath, 'utf8'));
|
|
18
|
+
|
|
19
|
+
if (plugin.version !== pkg.version) {
|
|
20
|
+
plugin.version = pkg.version;
|
|
21
|
+
await writeFile(pluginPath, JSON.stringify(plugin, null, 2) + '\n');
|
|
22
|
+
console.log(`openclaw.plugin.json version synced to ${pkg.version}`);
|
|
23
|
+
} else {
|
|
24
|
+
console.log(`openclaw.plugin.json already at ${pkg.version}`);
|
|
25
|
+
}
|