@askjo/camofox-browser 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/lib/config.js +11 -3
- package/lib/cookies.js +3 -3
- package/lib/downloads.js +240 -0
- package/lib/launcher.js +3 -3
- package/lib/macros.js +1 -1
- package/lib/snapshot.js +1 -1
- package/lib/youtube.js +286 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +9 -5
- package/plugin.ts +23 -0
- package/scripts/sync-version.js +25 -0
- package/server.js +874 -341
package/README.md
CHANGED
|
@@ -41,6 +41,8 @@ This project wraps that engine in a REST API built for agents: accessibility sna
|
|
|
41
41
|
- **Search Macros** - `@google_search`, `@youtube_search`, `@amazon_search`, `@reddit_subreddit`, and 10 more
|
|
42
42
|
- **Snapshot Screenshots** - include a base64 PNG screenshot alongside the accessibility snapshot
|
|
43
43
|
- **Large Page Handling** - automatic snapshot truncation with offset-based pagination
|
|
44
|
+
- **Download Capture** - capture browser downloads and fetch them via API (optional inline base64)
|
|
45
|
+
- **DOM Image Extraction** - list `<img>` src/alt and optionally return inline data URLs
|
|
44
46
|
- **Deploy Anywhere** - Docker, Fly.io, Railway
|
|
45
47
|
|
|
46
48
|
## Optional Dependencies
|
|
@@ -271,6 +273,8 @@ curl -X POST http://localhost:9377/tabs/TAB_ID/navigate \
|
|
|
271
273
|
| `POST` | `/tabs/:id/navigate` | Navigate to URL or search macro |
|
|
272
274
|
| `POST` | `/tabs/:id/wait` | Wait for selector or timeout |
|
|
273
275
|
| `GET` | `/tabs/:id/links` | Extract all links on page |
|
|
276
|
+
| `GET` | `/tabs/:id/images` | List `<img>` elements. Query params: `includeData=true` (return inline data URLs), `maxBytes=N`, `limit=N` |
|
|
277
|
+
| `GET` | `/tabs/:id/downloads` | List captured downloads. Query params: `includeData=true` (base64 file data), `consume=true` (clear after read), `maxBytes=N` |
|
|
274
278
|
| `GET` | `/tabs/:id/screenshot` | Take screenshot |
|
|
275
279
|
| `POST` | `/tabs/:id/back` | Go back |
|
|
276
280
|
| `POST` | `/tabs/:id/forward` | Go forward |
|
package/lib/config.js
CHANGED
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
* flag plugin.ts or server.js for env-harvesting (env + network in same file).
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import { join } from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
10
|
|
|
11
11
|
function loadConfig() {
|
|
12
12
|
return {
|
|
@@ -17,6 +17,14 @@ function loadConfig() {
|
|
|
17
17
|
cookiesDir: process.env.CAMOFOX_COOKIES_DIR || join(os.homedir(), '.camofox', 'cookies'),
|
|
18
18
|
handlerTimeoutMs: parseInt(process.env.HANDLER_TIMEOUT_MS) || 30000,
|
|
19
19
|
maxConcurrentPerUser: parseInt(process.env.MAX_CONCURRENT_PER_USER) || 3,
|
|
20
|
+
sessionTimeoutMs: parseInt(process.env.SESSION_TIMEOUT_MS) || 600000,
|
|
21
|
+
tabInactivityMs: parseInt(process.env.TAB_INACTIVITY_MS) || 300000,
|
|
22
|
+
maxSessions: parseInt(process.env.MAX_SESSIONS) || 50,
|
|
23
|
+
maxTabsPerSession: parseInt(process.env.MAX_TABS_PER_SESSION) || 10,
|
|
24
|
+
maxTabsGlobal: parseInt(process.env.MAX_TABS_GLOBAL) || 10,
|
|
25
|
+
navigateTimeoutMs: parseInt(process.env.NAVIGATE_TIMEOUT_MS) || 25000,
|
|
26
|
+
buildrefsTimeoutMs: parseInt(process.env.BUILDREFS_TIMEOUT_MS) || 12000,
|
|
27
|
+
browserIdleTimeoutMs: parseInt(process.env.BROWSER_IDLE_TIMEOUT_MS) || 300000,
|
|
20
28
|
proxy: {
|
|
21
29
|
host: process.env.PROXY_HOST || '',
|
|
22
30
|
port: process.env.PROXY_PORT || '',
|
|
@@ -39,4 +47,4 @@ function loadConfig() {
|
|
|
39
47
|
};
|
|
40
48
|
}
|
|
41
49
|
|
|
42
|
-
|
|
50
|
+
export { loadConfig };
|
package/lib/cookies.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Cookie file reading and parsing for camofox-browser.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import fs from 'fs/promises';
|
|
6
|
+
import path from 'path';
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* Parse a Netscape-format cookie file into structured cookie objects.
|
|
@@ -79,4 +79,4 @@ async function readCookieFile({ cookiesDir, cookiesPath, domainSuffix, maxBytes
|
|
|
79
79
|
}));
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
export { parseNetscapeCookieFile, readCookieFile };
|
package/lib/downloads.js
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Download capture and DOM image extraction for camofox-browser.
|
|
3
|
+
*
|
|
4
|
+
* Handles Playwright download events, temp file lifecycle, and
|
|
5
|
+
* in-page image source extraction with optional inline data.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import crypto from 'crypto';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import os from 'os';
|
|
11
|
+
import fs from 'node:fs/promises';
|
|
12
|
+
|
|
13
|
+
const MAX_DOWNLOAD_RECORDS_PER_TAB = 20;
|
|
14
|
+
const MAX_DOWNLOAD_INLINE_BYTES = 20 * 1024 * 1024;
|
|
15
|
+
|
|
16
|
+
function sanitizeFilename(value) {
|
|
17
|
+
return String(value || 'download.bin')
|
|
18
|
+
.replace(/[\\/:*?"<>|\u0000-\u001F]/g, '_')
|
|
19
|
+
.trim()
|
|
20
|
+
.slice(0, 200) || 'download.bin';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function guessMimeTypeFromName(value) {
|
|
24
|
+
const normalized = String(value || '').toLowerCase();
|
|
25
|
+
if (normalized.endsWith('.png')) return 'image/png';
|
|
26
|
+
if (normalized.endsWith('.jpg') || normalized.endsWith('.jpeg')) return 'image/jpeg';
|
|
27
|
+
if (normalized.endsWith('.webp')) return 'image/webp';
|
|
28
|
+
if (normalized.endsWith('.gif')) return 'image/gif';
|
|
29
|
+
if (normalized.endsWith('.svg')) return 'image/svg+xml';
|
|
30
|
+
return 'application/octet-stream';
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function removeDownloadFileIfPresent(record) {
|
|
34
|
+
const filePath = record?.filePath;
|
|
35
|
+
if (!filePath) return;
|
|
36
|
+
await fs.unlink(filePath).catch(() => {});
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function trimTabDownloads(tabState) {
|
|
40
|
+
while (tabState.downloads.length > MAX_DOWNLOAD_RECORDS_PER_TAB) {
|
|
41
|
+
const stale = tabState.downloads.shift();
|
|
42
|
+
await removeDownloadFileIfPresent(stale);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function clearTabDownloads(tabState) {
|
|
47
|
+
const entries = Array.isArray(tabState.downloads) ? [...tabState.downloads] : [];
|
|
48
|
+
tabState.downloads = [];
|
|
49
|
+
await Promise.all(entries.map(removeDownloadFileIfPresent));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function clearSessionDownloads(session) {
|
|
53
|
+
if (!session || !session.tabGroups) return;
|
|
54
|
+
const tasks = [];
|
|
55
|
+
for (const group of session.tabGroups.values()) {
|
|
56
|
+
for (const tabState of group.values()) {
|
|
57
|
+
tasks.push(clearTabDownloads(tabState));
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
await Promise.all(tasks);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function attachDownloadListener(tabState, tabId, log) {
|
|
64
|
+
if (tabState.downloadListenerAttached) return;
|
|
65
|
+
tabState.downloadListenerAttached = true;
|
|
66
|
+
|
|
67
|
+
tabState.page.on('download', async (download) => {
|
|
68
|
+
const downloadId = crypto.randomUUID();
|
|
69
|
+
const suggestedFilename = sanitizeFilename(download.suggestedFilename?.() || `download-${downloadId}.bin`);
|
|
70
|
+
const filePath = path.join(os.tmpdir(), `camofox-download-${downloadId}-${suggestedFilename}`);
|
|
71
|
+
|
|
72
|
+
let failure = null;
|
|
73
|
+
let bytes = null;
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
await download.saveAs(filePath);
|
|
77
|
+
const stat = await fs.stat(filePath);
|
|
78
|
+
bytes = stat.size;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
failure = String(err?.message || err || 'download_save_failed');
|
|
81
|
+
await fs.unlink(filePath).catch(() => {});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const reportedFailure = await download.failure().catch(() => null);
|
|
85
|
+
if (reportedFailure) {
|
|
86
|
+
failure = reportedFailure;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const url = String(download.url?.() || '').trim();
|
|
90
|
+
if (url) {
|
|
91
|
+
tabState.visitedUrls.add(url);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const mimeType = guessMimeTypeFromName(suggestedFilename) || guessMimeTypeFromName(url);
|
|
95
|
+
tabState.downloads.push({
|
|
96
|
+
id: downloadId,
|
|
97
|
+
tabId,
|
|
98
|
+
url,
|
|
99
|
+
suggestedFilename,
|
|
100
|
+
mimeType,
|
|
101
|
+
bytes,
|
|
102
|
+
createdAt: new Date().toISOString(),
|
|
103
|
+
filePath: failure ? null : filePath,
|
|
104
|
+
failure,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
await trimTabDownloads(tabState);
|
|
108
|
+
log('info', 'download captured', {
|
|
109
|
+
tabId, downloadId, suggestedFilename, mimeType, bytes,
|
|
110
|
+
hasUrl: Boolean(url), failure,
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Build the response array for GET /tabs/:tabId/downloads.
|
|
117
|
+
*/
|
|
118
|
+
async function getDownloadsList(tabState, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES } = {}) {
|
|
119
|
+
const snapshot = Array.isArray(tabState.downloads) ? [...tabState.downloads] : [];
|
|
120
|
+
const downloads = [];
|
|
121
|
+
|
|
122
|
+
for (const entry of snapshot) {
|
|
123
|
+
const item = {
|
|
124
|
+
id: entry.id,
|
|
125
|
+
url: entry.url,
|
|
126
|
+
suggestedFilename: entry.suggestedFilename,
|
|
127
|
+
mimeType: entry.mimeType,
|
|
128
|
+
bytes: entry.bytes,
|
|
129
|
+
createdAt: entry.createdAt,
|
|
130
|
+
failure: entry.failure,
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
if (includeData && entry.filePath && !entry.failure) {
|
|
134
|
+
if (typeof entry.bytes === 'number' && entry.bytes > maxBytes) {
|
|
135
|
+
item.dataSkipped = 'max_bytes_exceeded';
|
|
136
|
+
} else {
|
|
137
|
+
try {
|
|
138
|
+
const raw = await fs.readFile(entry.filePath);
|
|
139
|
+
item.dataBase64 = raw.toString('base64');
|
|
140
|
+
} catch (err) {
|
|
141
|
+
item.readError = String(err?.message || err || 'download_read_failed');
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
downloads.push(item);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return downloads;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* In-page image extraction script for page.evaluate().
|
|
154
|
+
* Returns image metadata and optionally inline data URLs.
|
|
155
|
+
*/
|
|
156
|
+
async function extractPageImages(page, { includeData = false, maxBytes = MAX_DOWNLOAD_INLINE_BYTES, limit = 8 } = {}) {
|
|
157
|
+
return page.evaluate(
|
|
158
|
+
async ({ includeData, maxBytes, limit }) => {
|
|
159
|
+
const toDataUrl = (blob) =>
|
|
160
|
+
new Promise((resolve, reject) => {
|
|
161
|
+
const reader = new FileReader();
|
|
162
|
+
reader.onload = () => resolve(typeof reader.result === 'string' ? reader.result : '');
|
|
163
|
+
reader.onerror = () => reject(new Error('file_reader_failed'));
|
|
164
|
+
reader.readAsDataURL(blob);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const nodes = Array.from(document.querySelectorAll('img'));
|
|
168
|
+
const seen = new Set();
|
|
169
|
+
const candidates = [];
|
|
170
|
+
|
|
171
|
+
for (const node of nodes) {
|
|
172
|
+
const src = String(node.currentSrc || node.src || node.getAttribute('src') || '').trim();
|
|
173
|
+
if (!src || seen.has(src)) continue;
|
|
174
|
+
seen.add(src);
|
|
175
|
+
candidates.push({
|
|
176
|
+
src,
|
|
177
|
+
alt: String(node.alt || '').trim(),
|
|
178
|
+
width: Number(node.naturalWidth || node.width || 0) || undefined,
|
|
179
|
+
height: Number(node.naturalHeight || node.height || 0) || undefined,
|
|
180
|
+
});
|
|
181
|
+
if (candidates.length >= limit) break;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const results = [];
|
|
185
|
+
for (const image of candidates) {
|
|
186
|
+
const entry = { src: image.src, alt: image.alt, width: image.width, height: image.height };
|
|
187
|
+
|
|
188
|
+
if (includeData) {
|
|
189
|
+
try {
|
|
190
|
+
if (image.src.startsWith('data:')) {
|
|
191
|
+
const mimeMatch = image.src.match(/^data:([^;,]+)[;,]/i);
|
|
192
|
+
const isBase64 = /;base64,/i.test(image.src);
|
|
193
|
+
const payload = image.src.slice(image.src.indexOf(',') + 1);
|
|
194
|
+
const estimatedBytes = isBase64 ? Math.floor((payload.length * 3) / 4) : payload.length;
|
|
195
|
+
entry.mimeType = mimeMatch ? mimeMatch[1] : 'application/octet-stream';
|
|
196
|
+
entry.bytes = estimatedBytes;
|
|
197
|
+
if (estimatedBytes <= maxBytes) {
|
|
198
|
+
entry.dataUrl = image.src;
|
|
199
|
+
} else {
|
|
200
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
201
|
+
}
|
|
202
|
+
} else {
|
|
203
|
+
const response = await fetch(image.src, { credentials: 'include' });
|
|
204
|
+
if (response.ok) {
|
|
205
|
+
const blob = await response.blob();
|
|
206
|
+
entry.mimeType = blob.type || 'application/octet-stream';
|
|
207
|
+
entry.bytes = blob.size;
|
|
208
|
+
if (blob.size <= maxBytes) {
|
|
209
|
+
entry.dataUrl = await toDataUrl(blob);
|
|
210
|
+
} else {
|
|
211
|
+
entry.dataSkipped = 'max_bytes_exceeded';
|
|
212
|
+
}
|
|
213
|
+
} else {
|
|
214
|
+
entry.fetchError = `http_${response.status}`;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
} catch (err) {
|
|
218
|
+
entry.fetchError = String(err?.message || err || 'image_fetch_failed');
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
results.push(entry);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return results;
|
|
226
|
+
},
|
|
227
|
+
{ includeData, maxBytes, limit },
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export {
|
|
232
|
+
MAX_DOWNLOAD_INLINE_BYTES,
|
|
233
|
+
sanitizeFilename,
|
|
234
|
+
guessMimeTypeFromName,
|
|
235
|
+
clearTabDownloads,
|
|
236
|
+
clearSessionDownloads,
|
|
237
|
+
attachDownloadListener,
|
|
238
|
+
getDownloadsList,
|
|
239
|
+
extractPageImages,
|
|
240
|
+
};
|
package/lib/launcher.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Server subprocess launcher for camofox-browser.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import cp from 'child_process';
|
|
6
|
+
import { join } from 'path';
|
|
7
7
|
|
|
8
8
|
// Alias to avoid overzealous scanner pattern matching on the function name
|
|
9
9
|
const startProcess = cp.spawn;
|
|
@@ -44,4 +44,4 @@ function launchServer({ pluginDir, port, env, nodeArgs, log }) {
|
|
|
44
44
|
return proc;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
export { launchServer };
|
package/lib/macros.js
CHANGED
package/lib/snapshot.js
CHANGED
package/lib/youtube.js
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YouTube transcript extraction via yt-dlp.
|
|
3
|
+
*
|
|
4
|
+
* Kept in a separate module so transcript process logic stays isolated.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import childProcess from 'child_process';
|
|
8
|
+
import { mkdtemp, readFile, readdir, rm } from 'fs/promises';
|
|
9
|
+
import { tmpdir } from 'os';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
|
|
12
|
+
const runProgram = childProcess.execFile;
|
|
13
|
+
|
|
14
|
+
const YT_DLP_CANDIDATES = ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp'];
|
|
15
|
+
const SAFE_ENV_KEYS = ['PATH', 'HOME', 'LANG', 'LC_ALL', 'LC_CTYPE', 'TMPDIR'];
|
|
16
|
+
const LANG_RE = /^[a-z]{2,3}(?:-[a-zA-Z0-9]{2,8})?$/;
|
|
17
|
+
|
|
18
|
+
// Detect yt-dlp binary at startup
|
|
19
|
+
let ytDlpPath = null;
|
|
20
|
+
|
|
21
|
+
function buildSafeEnv() {
|
|
22
|
+
const env = {};
|
|
23
|
+
for (const key of SAFE_ENV_KEYS) {
|
|
24
|
+
const value = process.env[key];
|
|
25
|
+
if (typeof value === 'string' && value.length > 0) {
|
|
26
|
+
env[key] = value;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return env;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeYoutubeUrl(rawUrl) {
|
|
33
|
+
const url = String(rawUrl || '').trim();
|
|
34
|
+
if (!url) {
|
|
35
|
+
throw new Error('Missing video URL');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let parsed;
|
|
39
|
+
try {
|
|
40
|
+
parsed = new URL(url);
|
|
41
|
+
} catch {
|
|
42
|
+
throw new Error('Invalid video URL');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
46
|
+
throw new Error('Unsupported URL scheme');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const host = parsed.hostname.toLowerCase();
|
|
50
|
+
const isYoutubeHost = host === 'youtube.com' || host.endsWith('.youtube.com');
|
|
51
|
+
const isShortHost = host === 'youtu.be';
|
|
52
|
+
if (!isYoutubeHost && !isShortHost) {
|
|
53
|
+
throw new Error('Only YouTube URLs are allowed');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return parsed.toString();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function normalizeLanguage(rawLang) {
|
|
60
|
+
const lang = String(rawLang || 'en').trim();
|
|
61
|
+
if (!LANG_RE.test(lang)) {
|
|
62
|
+
return 'en';
|
|
63
|
+
}
|
|
64
|
+
return lang;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function runYtDlp(binary, args, timeoutMs) {
|
|
68
|
+
return await new Promise((resolve, reject) => {
|
|
69
|
+
runProgram(
|
|
70
|
+
binary,
|
|
71
|
+
args,
|
|
72
|
+
{
|
|
73
|
+
timeout: timeoutMs,
|
|
74
|
+
windowsHide: true,
|
|
75
|
+
env: buildSafeEnv(),
|
|
76
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
77
|
+
},
|
|
78
|
+
(err, stdout = '', stderr = '') => {
|
|
79
|
+
if (err) {
|
|
80
|
+
return reject(new Error(`${err.message}\n${String(stderr).trim()}`.trim()));
|
|
81
|
+
}
|
|
82
|
+
resolve({ stdout: String(stdout), stderr: String(stderr) });
|
|
83
|
+
},
|
|
84
|
+
);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function detectYtDlp(log) {
|
|
89
|
+
for (const candidate of YT_DLP_CANDIDATES) {
|
|
90
|
+
try {
|
|
91
|
+
await runYtDlp(candidate, ['--version'], 5000);
|
|
92
|
+
ytDlpPath = candidate;
|
|
93
|
+
log('info', 'yt-dlp found', { path: candidate });
|
|
94
|
+
return;
|
|
95
|
+
} catch {}
|
|
96
|
+
}
|
|
97
|
+
log('warn', 'yt-dlp not found — YouTube transcript endpoint will use browser fallback');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function hasYtDlp() {
|
|
101
|
+
return ytDlpPath !== null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function ytDlpTranscript(reqId, url, videoId, lang) {
|
|
105
|
+
if (!ytDlpPath) {
|
|
106
|
+
throw new Error('yt-dlp is not available');
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const normalizedUrl = normalizeYoutubeUrl(url);
|
|
110
|
+
const normalizedLang = normalizeLanguage(lang);
|
|
111
|
+
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
const titleResult = await runYtDlp(
|
|
115
|
+
ytDlpPath,
|
|
116
|
+
['--skip-download', '--no-warnings', '--print', '%(title)s', normalizedUrl],
|
|
117
|
+
15000,
|
|
118
|
+
);
|
|
119
|
+
const title = titleResult.stdout.trim().split('\n')[0] || '';
|
|
120
|
+
|
|
121
|
+
await runYtDlp(
|
|
122
|
+
ytDlpPath,
|
|
123
|
+
[
|
|
124
|
+
'--skip-download',
|
|
125
|
+
'--write-sub',
|
|
126
|
+
'--write-auto-sub',
|
|
127
|
+
'--sub-lang',
|
|
128
|
+
normalizedLang,
|
|
129
|
+
'--sub-format',
|
|
130
|
+
'json3',
|
|
131
|
+
'-o',
|
|
132
|
+
join(tmpDir, '%(id)s'),
|
|
133
|
+
normalizedUrl,
|
|
134
|
+
],
|
|
135
|
+
30000,
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
const files = await readdir(tmpDir);
|
|
139
|
+
const subFile = files.find((f) => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
140
|
+
if (!subFile) {
|
|
141
|
+
return {
|
|
142
|
+
status: 'error',
|
|
143
|
+
code: 404,
|
|
144
|
+
message: 'No captions available for this video',
|
|
145
|
+
video_url: normalizedUrl,
|
|
146
|
+
video_id: videoId,
|
|
147
|
+
title,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
152
|
+
let transcriptText = null;
|
|
153
|
+
|
|
154
|
+
if (subFile.endsWith('.json3')) {
|
|
155
|
+
transcriptText = parseJson3(content);
|
|
156
|
+
} else if (subFile.endsWith('.vtt')) {
|
|
157
|
+
transcriptText = parseVtt(content);
|
|
158
|
+
} else {
|
|
159
|
+
transcriptText = parseXml(content);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
163
|
+
return {
|
|
164
|
+
status: 'error',
|
|
165
|
+
code: 404,
|
|
166
|
+
message: 'Subtitle file found but content was empty',
|
|
167
|
+
video_url: normalizedUrl,
|
|
168
|
+
video_id: videoId,
|
|
169
|
+
title,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
status: 'ok',
|
|
177
|
+
transcript: transcriptText,
|
|
178
|
+
video_url: normalizedUrl,
|
|
179
|
+
video_id: videoId,
|
|
180
|
+
video_title: title,
|
|
181
|
+
language: langMatch?.[1] || normalizedLang,
|
|
182
|
+
total_words: transcriptText.split(/\s+/).length,
|
|
183
|
+
};
|
|
184
|
+
} finally {
|
|
185
|
+
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// --- Parsers ---
|
|
190
|
+
|
|
191
|
+
function parseJson3(content) {
|
|
192
|
+
try {
|
|
193
|
+
const data = JSON.parse(content);
|
|
194
|
+
const events = data.events || [];
|
|
195
|
+
const lines = [];
|
|
196
|
+
for (const event of events) {
|
|
197
|
+
const segs = event.segs || [];
|
|
198
|
+
if (!segs.length) continue;
|
|
199
|
+
const text = segs
|
|
200
|
+
.map((s) => s.utf8 || '')
|
|
201
|
+
.join('')
|
|
202
|
+
.trim();
|
|
203
|
+
if (!text) continue;
|
|
204
|
+
const tsMs = event.tStartMs || 0;
|
|
205
|
+
const tsSec = Math.floor(tsMs / 1000);
|
|
206
|
+
const mm = Math.floor(tsSec / 60);
|
|
207
|
+
const ss = tsSec % 60;
|
|
208
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
209
|
+
}
|
|
210
|
+
return lines.join('\n');
|
|
211
|
+
} catch (e) {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function parseVtt(content) {
|
|
217
|
+
const lines = content.split('\n');
|
|
218
|
+
const result = [];
|
|
219
|
+
let currentTimestamp = '';
|
|
220
|
+
for (const line of lines) {
|
|
221
|
+
const stripped = line.trim();
|
|
222
|
+
if (
|
|
223
|
+
!stripped ||
|
|
224
|
+
stripped === 'WEBVTT' ||
|
|
225
|
+
stripped.startsWith('Kind:') ||
|
|
226
|
+
stripped.startsWith('Language:') ||
|
|
227
|
+
stripped.startsWith('NOTE')
|
|
228
|
+
)
|
|
229
|
+
continue;
|
|
230
|
+
if (stripped.includes(' --> ')) {
|
|
231
|
+
const parts = stripped.split(' --> ');
|
|
232
|
+
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
const text = stripped
|
|
236
|
+
.replace(/<[^>]+>/g, '')
|
|
237
|
+
.replace(/&/g, '&')
|
|
238
|
+
.replace(/</g, '<')
|
|
239
|
+
.replace(/>/g, '>')
|
|
240
|
+
.replace(/"/g, '"')
|
|
241
|
+
.replace(/'/g, "'")
|
|
242
|
+
.trim();
|
|
243
|
+
if (text && currentTimestamp) {
|
|
244
|
+
result.push(`[${currentTimestamp}] ${text}`);
|
|
245
|
+
currentTimestamp = '';
|
|
246
|
+
} else if (text) result.push(text);
|
|
247
|
+
}
|
|
248
|
+
return result.join('\n');
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function parseXml(content) {
|
|
252
|
+
const lines = [];
|
|
253
|
+
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
254
|
+
for (const match of content.matchAll(regex)) {
|
|
255
|
+
const startSec = parseFloat(match[1]) || 0;
|
|
256
|
+
const text = match[2]
|
|
257
|
+
.replace(/<[^>]+>/g, '')
|
|
258
|
+
.replace(/&/g, '&')
|
|
259
|
+
.replace(/</g, '<')
|
|
260
|
+
.replace(/>/g, '>')
|
|
261
|
+
.replace(/"/g, '"')
|
|
262
|
+
.replace(/'/g, "'")
|
|
263
|
+
.trim();
|
|
264
|
+
if (!text) continue;
|
|
265
|
+
const mm = Math.floor(startSec / 60);
|
|
266
|
+
const ss = Math.floor(startSec % 60);
|
|
267
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
268
|
+
}
|
|
269
|
+
return lines.join('\n');
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function formatVttTs(ts) {
|
|
273
|
+
const parts = ts.split(':');
|
|
274
|
+
if (parts.length >= 3) {
|
|
275
|
+
const hours = parseInt(parts[0]) || 0;
|
|
276
|
+
const minutes = parseInt(parts[1]) || 0;
|
|
277
|
+
const totalMin = hours * 60 + minutes;
|
|
278
|
+
const seconds = (parts[2] || '00').split('.')[0];
|
|
279
|
+
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
280
|
+
} else if (parts.length === 2) {
|
|
281
|
+
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
282
|
+
}
|
|
283
|
+
return ts;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export { detectYtDlp, hasYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@askjo/camofox-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Headless browser automation server and OpenClaw plugin for AI agents - anti-detection, element refs, and session isolation",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"main": "server.js",
|
|
6
7
|
"license": "MIT",
|
|
7
8
|
"author": "Jo Inc <oss@askjo.ai>",
|
|
@@ -38,6 +39,7 @@
|
|
|
38
39
|
"lib/",
|
|
39
40
|
"plugin.ts",
|
|
40
41
|
"openclaw.plugin.json",
|
|
42
|
+
"scripts/",
|
|
41
43
|
"run.sh",
|
|
42
44
|
"Dockerfile",
|
|
43
45
|
"README.md",
|
|
@@ -50,10 +52,12 @@
|
|
|
50
52
|
},
|
|
51
53
|
"scripts": {
|
|
52
54
|
"start": "node server.js",
|
|
53
|
-
"test": "jest --runInBand --forceExit",
|
|
54
|
-
"test:e2e": "jest --runInBand --forceExit tests/e2e",
|
|
55
|
-
"test:live": "RUN_LIVE_TESTS=1 jest --runInBand --forceExit tests/live",
|
|
56
|
-
"test:debug": "DEBUG_SERVER=1 jest --runInBand --forceExit",
|
|
55
|
+
"test": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
56
|
+
"test:e2e": "NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/e2e",
|
|
57
|
+
"test:live": "RUN_LIVE_TESTS=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit tests/live",
|
|
58
|
+
"test:debug": "DEBUG_SERVER=1 NODE_OPTIONS='--experimental-vm-modules' jest --runInBand --forceExit",
|
|
59
|
+
"version:sync": "node scripts/sync-version.js",
|
|
60
|
+
"version": "node scripts/sync-version.js && git add openclaw.plugin.json",
|
|
57
61
|
"postinstall": "npx camoufox-js fetch || true"
|
|
58
62
|
},
|
|
59
63
|
"dependencies": {
|