@askjo/camofox-browser 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/lib/config.js +5 -4
- package/lib/cookies.js +3 -3
- package/lib/downloads.js +240 -0
- package/lib/launcher.js +3 -3
- package/lib/macros.js +1 -1
- package/lib/snapshot.js +1 -1
- package/lib/youtube.js +160 -51
- package/openclaw.plugin.json +1 -1
- package/package.json +9 -5
- package/plugin.ts +23 -0
- package/scripts/sync-version.js +25 -0
- package/server.js +857 -151
package/README.md
CHANGED
|
@@ -41,6 +41,8 @@ This project wraps that engine in a REST API built for agents: accessibility sna
|
|
|
41
41
|
- **Search Macros** - `@google_search`, `@youtube_search`, `@amazon_search`, `@reddit_subreddit`, and 10 more
|
|
42
42
|
- **Snapshot Screenshots** - include a base64 PNG screenshot alongside the accessibility snapshot
|
|
43
43
|
- **Large Page Handling** - automatic snapshot truncation with offset-based pagination
|
|
44
|
+
- **Download Capture** - capture browser downloads and fetch them via API (optional inline base64)
|
|
45
|
+
- **DOM Image Extraction** - list `<img>` src/alt and optionally return inline data URLs
|
|
44
46
|
- **Deploy Anywhere** - Docker, Fly.io, Railway
|
|
45
47
|
|
|
46
48
|
## Optional Dependencies
|
|
@@ -271,6 +273,8 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
271
273
|
| `POST` | `/tabs/:id/navigate` | Navigate to URL or search macro |
|
|
272
274
|
| `POST` | `/tabs/:id/wait` | Wait for selector or timeout |
|
|
273
275
|
| `GET` | `/tabs/:id/links` | Extract all links on page |
|
|
276
|
+
| `GET` | `/tabs/:id/images` | List `<img>` elements. Query params: `includeData=true` (return inline data URLs), `maxBytes=N`, `limit=N` |
|
|
277
|
+
| `GET` | `/tabs/:id/downloads` | List captured downloads. Query params: `includeData=true` (base64 file data), `consume=true` (clear after read), `maxBytes=N` |
|
|
274
278
|
| `GET` | `/tabs/:id/screenshot` | Take screenshot |
|
|
275
279
|
| `POST` | `/tabs/:id/back` | Go back |
|
|
276
280
|
| `POST` | `/tabs/:id/forward` | Go forward |
|
package/lib/config.js
CHANGED
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* flag plugin.ts or server.js for env-harvesting (env + network in same file).
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import { join } from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
10
|
|
|
11
11
|
function loadConfig() {
|
|
12
12
|
return {
|
|
@@ -17,7 +17,8 @@ function loadConfig() {
|
|
|
17
17
|
cookiesDir: process.env.CAMOFOX_COOKIES_DIR || join(os.homedir(), '.camofox', 'cookies'),
|
|
18
18
|
handlerTimeoutMs: parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000,
|
|
19
19
|
maxConcurrentPerUser: parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3,
|
|
20
|
-
sessionTimeoutMs: parseInt(process.env.SESSION_TIMEOUT_MS) ||
|
|
20
|
+
sessionTimeoutMs: parseInt(process.env.SESSION_TIMEOUT_MS) || 600000,
|
|
21
|
+
tabInactivityMs: parseInt(process.env.TAB_INACTIVITY_MS) || 300000,
|
|
21
22
|
maxSessions: parseInt(process.env.MAX_SESSIONS) || 50,
|
|
22
23
|
maxTabsPerSession: parseInt(process.env.MAX_TABS_PER_SESSION) || 10,
|
|
23
24
|
maxTabsGlobal: parseInt(process.env.MAX_TABS_GLOBAL) || 10,
|
|
@@ -46,4 +47,4 @@ function loadConfig() {
|
|
|
46
47
|
};
|
|
47
48
|
}
|
|
48
49
|
|
|
49
|
-
|
|
50
|
+
export { loadConfig };
|
package/lib/cookies.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Cookie file reading and parsing for camofox-browser.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import fs from 'fs/promises';
|
|
6
|
+
import path from 'path';
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* Parse a Netscape-format cookie file into structured cookie objects.
|
|
@@ -79,4 +79,4 @@ async function readCookieFile({ cookiesDir, cookiesPath, domainSuffix, maxBytes
|
|
|
79
79
|
}));
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
export { parseNetscapeCookieFile, readCookieFile };
|
package/lib/downloads.js
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Download capture and DOM image extraction for camofox-browser.
|
|
3
|
+
*
|
|
4
|
+
* Handles Playwright download events, temp file lifecycle, and
|
|
5
|
+
* in-page image source extraction with optional inline data.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import os from 'os';
|
|
11
|
+
import fs from 'node:fs/promises';
|
|
12
|
+
|
|
13
|
+
const MAX_DOWNLOAD_RECORDS_PER_TAB = 20;
|
|
14
|
+
const MAX_DOWNLOAD_INLINE_BYTES = 20 * 1024 * 1024;
|
|
15
|
+
|
|
16
|
+
function sanitizeFilename(value) {
|
|
17
|
+
return String(value || 'download.bin')
|
|
18
|
+
.replace(/[\\/:*?"<>|\u0000-\u001F]/g, '_')
|
|
19
|
+
.trim()
|
|
20
|
+
.slice(0, 200) || 'download.bin';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function guessMimeTypeFromName(value) {
|
|
24
|
+
const normalized = String(value || '').toLowerCase();
|
|
25
|
+
if (normalized.endsWith('.png')) return 'image/png';
|
|
26
|
+
if (normalized.endsWith('.jpg') || normalized.endsWith('.jpeg')) return 'image/jpeg';
|
|
27
|
+
if (normalized.endsWith('.webp')) return 'image/webp';
|
|
28
|
+
if (normalized.endsWith('.gif')) return 'image/gif';
|
|
29
|
+
if (normalized.endsWith('.svg')) return 'image/svg+xml';
|
|
30
|
+
return 'application/octet-stream';
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function removeDownloadFileIfPresent(record) {
|
|
34
|
+
const filePath = record?.filePath;
|
|
35
|
+
if (!filePath) return;
|
|
36
|
+
await fs.unlink(filePath).catch(() => {});
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function trimTabDownloads(tabState) {
|
|
40
|
+
while (tabState.downloads.length > MAX_DOWNLOAD_RECORDS_PER_TAB) {
|
|
41
|
+
const stale = tabState.downloads.shift();
|
|
42
|
+
await removeDownloadFileIfPresent(stale);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function clearTabDownloads(tabState) {
|
|
47
|
+
const entries = Array.isArray(tabState.downloads) ? [...tabState.downloads] : [];
|
|
48
|
+
tabState.downloads = [];
|
|
49
|
+
await Promise.all(entries.map(removeDownloadFileIfPresent));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function clearSessionDownloads(session) {
|
|
53
|
+
if (!session || !session.tabGroups) return;
|
|
54
|
+
const tasks = [];
|
|
55
|
+
for (const group of session.tabGroups.values()) {
|
|
56
|
+
for (const tabState of group.values()) {
|
|
57
|
+
tasks.push(clearTabDownloads(tabState));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
await Promise.all(tasks);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function attachDownloadListener(tabState, tabId, log) {
|
|
64
|
+
if (tabState.downloadListenerAttached) return;
|
|
65
|
+
tabState.downloadListenerAttached = true;
|
|
66
|
+
|
|
67
|
+
tabState.page.on('download', async (download) => {
|
|
68
|
+
const downloadId = crypto.randomUUID();
|
|
69
|
+
const suggestedFilename = sanitizeFilename(download.suggestedFilename?.() || `download-${downloadId}.bin`);
|
|
70
|
+
const filePath = path.join(os.tmpdir(), `camofox-download-${downloadId}-${suggestedFilename}`);
|
|
71
|
+
|
|
72
|
+
let failure = null;
|
|
73
|
+
let bytes = null;
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
await download.saveAs(filePath);
|
|
77
|
+
const stat = await fs.stat(filePath);
|
|
78
|
+
bytes = stat.size;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
failure = String(err?.message || err || 'download_save_failed');
|
|
81
|
+
await fs.unlink(filePath).catch(() => {});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const reportedFailure = await download.failure().catch(() => null);
|
|
85
|
+
if (reportedFailure) {
|
|
86
|
+
failure = reportedFailure;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const url = String(download.url?.() || '').trim();
|
|
90
|
+
if (url) {
|
|
91
|
+
tabState.visitedUrls.add(url);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const mimeType = guessMimeTypeFromName(suggestedFilename) || guessMimeTypeFromName(url);
|
|
95
|
+
tabState.downloads.push({
|
|
96
|
+
id: downloadId,
|
|
97
|
+
tabId,
|
|
98
|
+
url,
|
|
99
|
+
suggestedFilename,
|
|
100
|
+
mimeType,
|
|
101
|
+
bytes,
|
|
102
|
+
createdAt: new Date().toISOString(),
|
|
103
|
+
filePath: failure ? null : filePath,
|
|
104
|
+
failure,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
await trimTabDownloads(tabState);
|
|
108
|
+
log('info', 'download captured', {
|
|
109
|
+
tabId, downloadId, suggestedFilename, mimeType, bytes,
|
|
110
|
+
hasUrl: Boolean(url), failure,
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Build the response array for GET /tabs/:tabId/downloads.
|
|
117
|
+
*/
|
|
118
|
+
async function getDownloadsList(tabState, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES } = {}) {
|
|
119
|
+
const snapshot = Array.isArray(tabState.downloads) ? [...tabState.downloads] : [];
|
|
120
|
+
const downloads = [];
|
|
121
|
+
|
|
122
|
+
for (const entry of snapshot) {
|
|
123
|
+
const item = {
|
|
124
|
+
id: entry.id,
|
|
125
|
+
url: entry.url,
|
|
126
|
+
suggestedFilename: entry.suggestedFilename,
|
|
127
|
+
mimeType: entry.mimeType,
|
|
128
|
+
bytes: entry.bytes,
|
|
129
|
+
createdAt: entry.createdAt,
|
|
130
|
+
failure: entry.failure,
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
if (includeData && entry.filePath && !entry.failure) {
|
|
134
|
+
if (typeof entry.bytes === 'number' && entry.bytes > maxBytes) {
|
|
135
|
+
item.dataSkipped = 'max_bytes_exceeded';
|
|
136
|
+
} else {
|
|
137
|
+
try {
|
|
138
|
+
const raw = await fs.readFile(entry.filePath);
|
|
139
|
+
item.dataBase64 = raw.toString('base64');
|
|
140
|
+
} catch (err) {
|
|
141
|
+
item.readError = String(err?.message || err || 'download_read_failed');
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
downloads.push(item);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return downloads;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* In-page image extraction script for page.evaluate().
|
|
154
|
+
* Returns image metadata and optionally inline data URLs.
|
|
155
|
+
*/
|
|
156
|
+
async function extractPageImages(page, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES, limit = 8 } = {}) {
|
|
157
|
+
return page.evaluate(
|
|
158
|
+
async ({ includeData, maxBytes, limit }) => {
|
|
159
|
+
const toDataUrl = (blob) =>
|
|
160
|
+
new Promise((resolve, reject) => {
|
|
161
|
+
const reader = new FileReader();
|
|
162
|
+
reader.onload = () => resolve(typeof reader.result === 'string' ? reader.result : '');
|
|
163
|
+
reader.onerror = () => reject(new Error('file_reader_failed'));
|
|
164
|
+
reader.readAsDataURL(blob);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const nodes = Array.from(document.querySelectorAll('img'));
|
|
168
|
+
const seen = new Set();
|
|
169
|
+
const candidates = [];
|
|
170
|
+
|
|
171
|
+
for (const node of nodes) {
|
|
172
|
+
const src = String(node.currentSrc || node.src || node.getAttribute('src') || '').trim();
|
|
173
|
+
if (!src || seen.has(src)) continue;
|
|
174
|
+
seen.add(src);
|
|
175
|
+
candidates.push({
|
|
176
|
+
src,
|
|
177
|
+
alt: String(node.alt || '').trim(),
|
|
178
|
+
width: Number(node.naturalWidth || node.width || 0) || undefined,
|
|
179
|
+
height: Number(node.naturalHeight || node.height || 0) || undefined,
|
|
180
|
+
});
|
|
181
|
+
if (candidates.length >= limit) break;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const results = [];
|
|
185
|
+
for (const image of candidates) {
|
|
186
|
+
const entry = { src: image.src, alt: image.alt, width: image.width, height: image.height };
|
|
187
|
+
|
|
188
|
+
if (includeData) {
|
|
189
|
+
try {
|
|
190
|
+
if (image.src.startsWith('data:')) {
|
|
191
|
+
const mimeMatch = image.src.match(/^data:([^;,]+)[;,]/i);
|
|
192
|
+
const isBase64 = /;base64,/i.test(image.src);
|
|
193
|
+
const payload = image.src.slice(image.src.indexOf(',') + 1);
|
|
194
|
+
const estimatedBytes = isBase64 ? Math.floor((payload.length * 3) / 4) : payload.length;
|
|
195
|
+
entry.mimeType = mimeMatch ? mimeMatch[1] : 'application/octet-stream';
|
|
196
|
+
entry.bytes = estimatedBytes;
|
|
197
|
+
if (estimatedBytes <= maxBytes) {
|
|
198
|
+
entry.dataUrl = image.src;
|
|
199
|
+
} else {
|
|
200
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
201
|
+
}
|
|
202
|
+
} else {
|
|
203
|
+
const response = await fetch(image.src, { credentials: 'include' });
|
|
204
|
+
if (response.ok) {
|
|
205
|
+
const blob = await response.blob();
|
|
206
|
+
entry.mimeType = blob.type || 'application/octet-stream';
|
|
207
|
+
entry.bytes = blob.size;
|
|
208
|
+
if (blob.size <= maxBytes) {
|
|
209
|
+
entry.dataUrl = await toDataUrl(blob);
|
|
210
|
+
} else {
|
|
211
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
212
|
+
}
|
|
213
|
+
} else {
|
|
214
|
+
entry.fetchError = `http_${response.status}`;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
} catch (err) {
|
|
218
|
+
entry.fetchError = String(err?.message || err || 'image_fetch_failed');
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
results.push(entry);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return results;
|
|
226
|
+
},
|
|
227
|
+
{ includeData, maxBytes, limit },
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export {
|
|
232
|
+
MAX_DOWNLOAD_INLINE_BYTES,
|
|
233
|
+
sanitizeFilename,
|
|
234
|
+
guessMimeTypeFromName,
|
|
235
|
+
clearTabDownloads,
|
|
236
|
+
clearSessionDownloads,
|
|
237
|
+
attachDownloadListener,
|
|
238
|
+
getDownloadsList,
|
|
239
|
+
extractPageImages,
|
|
240
|
+
};
|
package/lib/launcher.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Server subprocess launcher for camofox-browser.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import cp from 'child_process';
|
|
6
|
+
import { join } from 'path';
|
|
7
7
|
|
|
8
8
|
// Alias to avoid overzealous scanner pattern matching on the function name
|
|
9
9
|
const startProcess = cp.spawn;
|
|
@@ -44,4 +44,4 @@ function launchServer({ pluginDir, port, env, nodeArgs, log }) {
|
|
|
44
44
|
return proc;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
export { launchServer };
|
package/lib/macros.js
CHANGED
package/lib/snapshot.js
CHANGED
package/lib/youtube.js
CHANGED
|
@@ -1,27 +1,94 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* YouTube transcript extraction via yt-dlp.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* with app.post routes in the same file (triggers OpenClaw scanner).
|
|
4
|
+
* Kept in a separate module so transcript process logic stays isolated.
|
|
6
5
|
*/
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
import childProcess from 'child_process';
|
|
8
|
+
import { mkdtemp, readFile, readdir, rm } from 'fs/promises';
|
|
9
|
+
import { tmpdir } from 'os';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
|
|
12
|
+
const runProgram = childProcess.execFile;
|
|
13
|
+
|
|
14
|
+
const YT_DLP_CANDIDATES = ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp'];
|
|
15
|
+
const SAFE_ENV_KEYS = ['PATH', 'HOME', 'LANG', 'LC_ALL', 'LC_CTYPE', 'TMPDIR'];
|
|
16
|
+
const LANG_RE = /^[a-z]{2,3}(?:-[a-zA-Z0-9]{2,8})?$/;
|
|
12
17
|
|
|
13
18
|
// Detect yt-dlp binary at startup
|
|
14
19
|
let ytDlpPath = null;
|
|
15
20
|
|
|
21
|
+
function buildSafeEnv() {
|
|
22
|
+
const env = {};
|
|
23
|
+
for (const key of SAFE_ENV_KEYS) {
|
|
24
|
+
const value = process.env[key];
|
|
25
|
+
if (typeof value === 'string' && value.length > 0) {
|
|
26
|
+
env[key] = value;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return env;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeYoutubeUrl(rawUrl) {
|
|
33
|
+
const url = String(rawUrl || '').trim();
|
|
34
|
+
if (!url) {
|
|
35
|
+
throw new Error('Missing video URL');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let parsed;
|
|
39
|
+
try {
|
|
40
|
+
parsed = new URL(url);
|
|
41
|
+
} catch {
|
|
42
|
+
throw new Error('Invalid video URL');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
46
|
+
throw new Error('Unsupported URL scheme');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const host = parsed.hostname.toLowerCase();
|
|
50
|
+
const isYoutubeHost = host === 'youtube.com' || host.endsWith('.youtube.com');
|
|
51
|
+
const isShortHost = host === 'youtu.be';
|
|
52
|
+
if (!isYoutubeHost && !isShortHost) {
|
|
53
|
+
throw new Error('Only YouTube URLs are allowed');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return parsed.toString();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function normalizeLanguage(rawLang) {
|
|
60
|
+
const lang = String(rawLang || 'en').trim();
|
|
61
|
+
if (!LANG_RE.test(lang)) {
|
|
62
|
+
return 'en';
|
|
63
|
+
}
|
|
64
|
+
return lang;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function runYtDlp(binary, args, timeoutMs) {
|
|
68
|
+
return await new Promise((resolve, reject) => {
|
|
69
|
+
runProgram(
|
|
70
|
+
binary,
|
|
71
|
+
args,
|
|
72
|
+
{
|
|
73
|
+
timeout: timeoutMs,
|
|
74
|
+
windowsHide: true,
|
|
75
|
+
env: buildSafeEnv(),
|
|
76
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
77
|
+
},
|
|
78
|
+
(err, stdout = '', stderr = '') => {
|
|
79
|
+
if (err) {
|
|
80
|
+
return reject(new Error(`${err.message}\n${String(stderr).trim()}`.trim()));
|
|
81
|
+
}
|
|
82
|
+
resolve({ stdout: String(stdout), stderr: String(stderr) });
|
|
83
|
+
},
|
|
84
|
+
);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
16
88
|
async function detectYtDlp(log) {
|
|
17
|
-
for (const candidate of
|
|
89
|
+
for (const candidate of YT_DLP_CANDIDATES) {
|
|
18
90
|
try {
|
|
19
|
-
await
|
|
20
|
-
execFile(candidate, ['--version'], { timeout: 5000 }, (err, stdout) => {
|
|
21
|
-
if (err) return reject(err);
|
|
22
|
-
resolve(stdout.trim());
|
|
23
|
-
});
|
|
24
|
-
});
|
|
91
|
+
await runYtDlp(candidate, ['--version'], 5000);
|
|
25
92
|
ytDlpPath = candidate;
|
|
26
93
|
log('info', 'yt-dlp found', { path: candidate });
|
|
27
94
|
return;
|
|
@@ -35,38 +102,49 @@ function hasYtDlp() {
|
|
|
35
102
|
}
|
|
36
103
|
|
|
37
104
|
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
105
|
+
if (!ytDlpPath) {
|
|
106
|
+
throw new Error('yt-dlp is not available');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const normalizedUrl = normalizeYoutubeUrl(url);
|
|
110
|
+
const normalizedLang = normalizeLanguage(lang);
|
|
38
111
|
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
112
|
+
|
|
39
113
|
try {
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
execFile(ytDlpPath, [
|
|
114
|
+
const titleResult = await runYtDlp(
|
|
115
|
+
ytDlpPath,
|
|
116
|
+
['--skip-download', '--no-warnings', '--print', '%(title)s', normalizedUrl],
|
|
117
|
+
15000,
|
|
118
|
+
);
|
|
119
|
+
const title = titleResult.stdout.trim().split('\n')[0] || '';
|
|
120
|
+
|
|
121
|
+
await runYtDlp(
|
|
122
|
+
ytDlpPath,
|
|
123
|
+
[
|
|
51
124
|
'--skip-download',
|
|
52
|
-
'--write-sub',
|
|
53
|
-
'--sub
|
|
54
|
-
'--sub-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
125
|
+
'--write-sub',
|
|
126
|
+
'--write-auto-sub',
|
|
127
|
+
'--sub-lang',
|
|
128
|
+
normalizedLang,
|
|
129
|
+
'--sub-format',
|
|
130
|
+
'json3',
|
|
131
|
+
'-o',
|
|
132
|
+
join(tmpDir, '%(id)s'),
|
|
133
|
+
normalizedUrl,
|
|
134
|
+
],
|
|
135
|
+
30000,
|
|
136
|
+
);
|
|
62
137
|
|
|
63
138
|
const files = await readdir(tmpDir);
|
|
64
|
-
const subFile = files.find(f => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
139
|
+
const subFile = files.find((f) => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
65
140
|
if (!subFile) {
|
|
66
141
|
return {
|
|
67
|
-
status: 'error',
|
|
142
|
+
status: 'error',
|
|
143
|
+
code: 404,
|
|
68
144
|
message: 'No captions available for this video',
|
|
69
|
-
video_url:
|
|
145
|
+
video_url: normalizedUrl,
|
|
146
|
+
video_id: videoId,
|
|
147
|
+
title,
|
|
70
148
|
};
|
|
71
149
|
}
|
|
72
150
|
|
|
@@ -83,18 +161,24 @@ async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
|
83
161
|
|
|
84
162
|
if (!transcriptText || !transcriptText.trim()) {
|
|
85
163
|
return {
|
|
86
|
-
status: 'error',
|
|
164
|
+
status: 'error',
|
|
165
|
+
code: 404,
|
|
87
166
|
message: 'Subtitle file found but content was empty',
|
|
88
|
-
video_url:
|
|
167
|
+
video_url: normalizedUrl,
|
|
168
|
+
video_id: videoId,
|
|
169
|
+
title,
|
|
89
170
|
};
|
|
90
171
|
}
|
|
91
172
|
|
|
92
173
|
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
93
174
|
|
|
94
175
|
return {
|
|
95
|
-
status: 'ok',
|
|
96
|
-
|
|
97
|
-
|
|
176
|
+
status: 'ok',
|
|
177
|
+
transcript: transcriptText,
|
|
178
|
+
video_url: normalizedUrl,
|
|
179
|
+
video_id: videoId,
|
|
180
|
+
video_title: title,
|
|
181
|
+
language: langMatch?.[1] || normalizedLang,
|
|
98
182
|
total_words: transcriptText.split(/\s+/).length,
|
|
99
183
|
};
|
|
100
184
|
} finally {
|
|
@@ -112,7 +196,10 @@ function parseJson3(content) {
|
|
|
112
196
|
for (const event of events) {
|
|
113
197
|
const segs = event.segs || [];
|
|
114
198
|
if (!segs.length) continue;
|
|
115
|
-
const text = segs
|
|
199
|
+
const text = segs
|
|
200
|
+
.map((s) => s.utf8 || '')
|
|
201
|
+
.join('')
|
|
202
|
+
.trim();
|
|
116
203
|
if (!text) continue;
|
|
117
204
|
const tsMs = event.tStartMs || 0;
|
|
118
205
|
const tsSec = Math.floor(tsMs / 1000);
|
|
@@ -132,15 +219,31 @@ function parseVtt(content) {
|
|
|
132
219
|
let currentTimestamp = '';
|
|
133
220
|
for (const line of lines) {
|
|
134
221
|
const stripped = line.trim();
|
|
135
|
-
if (
|
|
222
|
+
if (
|
|
223
|
+
!stripped ||
|
|
224
|
+
stripped === 'WEBVTT' ||
|
|
225
|
+
stripped.startsWith('Kind:') ||
|
|
226
|
+
stripped.startsWith('Language:') ||
|
|
227
|
+
stripped.startsWith('NOTE')
|
|
228
|
+
)
|
|
229
|
+
continue;
|
|
136
230
|
if (stripped.includes(' --> ')) {
|
|
137
231
|
const parts = stripped.split(' --> ');
|
|
138
232
|
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
139
233
|
continue;
|
|
140
234
|
}
|
|
141
|
-
const text = stripped
|
|
142
|
-
|
|
143
|
-
|
|
235
|
+
const text = stripped
|
|
236
|
+
.replace(/<[^>]+>/g, '')
|
|
237
|
+
.replace(/&/g, '&')
|
|
238
|
+
.replace(/</g, '<')
|
|
239
|
+
.replace(/>/g, '>')
|
|
240
|
+
.replace(/"/g, '"')
|
|
241
|
+
.replace(/'/g, "'")
|
|
242
|
+
.trim();
|
|
243
|
+
if (text && currentTimestamp) {
|
|
244
|
+
result.push(`[${currentTimestamp}] ${text}`);
|
|
245
|
+
currentTimestamp = '';
|
|
246
|
+
} else if (text) result.push(text);
|
|
144
247
|
}
|
|
145
248
|
return result.join('\n');
|
|
146
249
|
}
|
|
@@ -148,10 +251,16 @@ function parseVtt(content) {
|
|
|
148
251
|
function parseXml(content) {
|
|
149
252
|
const lines = [];
|
|
150
253
|
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
151
|
-
|
|
152
|
-
while ((match = regex.exec(content)) !== null) {
|
|
254
|
+
for (const match of content.matchAll(regex)) {
|
|
153
255
|
const startSec = parseFloat(match[1]) || 0;
|
|
154
|
-
const text = match[2]
|
|
256
|
+
const text = match[2]
|
|
257
|
+
.replace(/<[^>]+>/g, '')
|
|
258
|
+
.replace(/&/g, '&')
|
|
259
|
+
.replace(/</g, '<')
|
|
260
|
+
.replace(/>/g, '>')
|
|
261
|
+
.replace(/"/g, '"')
|
|
262
|
+
.replace(/'/g, "'")
|
|
263
|
+
.trim();
|
|
155
264
|
if (!text) continue;
|
|
156
265
|
const mm = Math.floor(startSec / 60);
|
|
157
266
|
const ss = Math.floor(startSec % 60);
|
|
@@ -174,4 +283,4 @@ function formatVttTs(ts) {
|
|
|
174
283
|
return ts;
|
|
175
284
|
}
|
|
176
285
|
|
|
177
|
-
|
|
286
|
+
export { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askjo/camofox-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"main": "server.js",
|
|
6
7
|
"license": "MIT",
|
|
7
8
|
"author": "Jo Inc <oss@askjo.ai>",
|
|
@@ -38,6 +39,7 @@
|
|
|
38
39
|
"lib/",
|
|
39
40
|
"plugin.ts",
|
|
40
41
|
"openclaw.plugin.json",
|
|
42
|
+
"scripts/",
|
|
41
43
|
"run.sh",
|
|
42
44
|
"Dockerfile",
|
|
43
45
|
"README.md",
|
|
@@ -50,10 +52,12 @@
|
|
|
50
52
|
},
|
|
51
53
|
"scripts": {
|
|
52
54
|
"start": "node server.js",
|
|
53
|
-
"test": "jest --runInBand --forceExit",
|
|
54
|
-
"test:e2e": "jest --runInBand --forceExit tests/e2e",
|
|
55
|
-
"test:live": "RUN_LIVE_TESTS=1 jest --runInBand --forceExit tests/live",
|
|
56
|
-
"test:debug": "DEBUG_SERVER=1 jest --runInBand --forceExit",
|
|
55
|
+
"test": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
56
|
+
"test:e2e": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/e2e",
|
|
57
|
+
"test:live": "RUN_LIVE_TESTS=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/live",
|
|
58
|
+
"test:debug": "DEBUG_SERVER=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
59
|
+
"version:sync": "node scripts/sync-version.js",
|
|
60
|
+
"version": "node scripts/sync-version.js && git add openclaw.plugin.json",
|
|
57
61
|
"postinstall": "npx camoufox-js fetch || true"
|
|
58
62
|
},
|
|
59
63
|
"dependencies": {
|