@phi-code-admin/camofox-browser 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +571 -571
- package/Dockerfile +86 -86
- package/LICENSE +21 -21
- package/README.md +691 -691
- package/camofox.config.json +10 -10
- package/lib/auth.js +134 -134
- package/lib/camoufox-executable.js +189 -189
- package/lib/config.js +153 -153
- package/lib/cookies.js +119 -119
- package/lib/downloads.js +168 -168
- package/lib/extract.js +74 -74
- package/lib/fly.js +54 -54
- package/lib/images.js +88 -88
- package/lib/inflight.js +16 -16
- package/lib/launcher.js +47 -47
- package/lib/macros.js +31 -31
- package/lib/metrics.js +184 -184
- package/lib/openapi.js +105 -105
- package/lib/persistence.js +89 -89
- package/lib/plugins.js +178 -175
- package/lib/proxy.js +277 -277
- package/lib/reporter.js +1102 -1102
- package/lib/request-utils.js +59 -59
- package/lib/resources.js +76 -76
- package/lib/snapshot.js +41 -41
- package/lib/tmp-cleanup.js +108 -108
- package/lib/tracing.js +137 -137
- package/openclaw.plugin.json +268 -268
- package/package.json +148 -148
- package/plugin.ts +758 -758
- package/plugins/persistence/AGENTS.md +37 -37
- package/plugins/persistence/README.md +48 -48
- package/plugins/persistence/index.js +124 -124
- package/plugins/vnc/AGENTS.md +42 -42
- package/plugins/vnc/README.md +165 -165
- package/plugins/vnc/apt.txt +7 -7
- package/plugins/vnc/index.js +142 -142
- package/plugins/vnc/spawn.js +8 -8
- package/plugins/vnc/vnc-launcher.js +64 -64
- package/plugins/vnc/vnc-watcher.sh +82 -82
- package/plugins/youtube/AGENTS.md +25 -25
- package/plugins/youtube/apt.txt +1 -1
- package/plugins/youtube/index.js +206 -206
- package/plugins/youtube/post-install.sh +5 -5
- package/plugins/youtube/youtube.js +301 -301
- package/run.sh +37 -37
- package/scripts/exec.js +8 -8
- package/scripts/generate-openapi.js +24 -24
- package/scripts/install-plugin-deps.sh +63 -63
- package/scripts/plugin.js +342 -342
- package/scripts/sync-version.js +25 -25
- package/server.js +6062 -6059
- package/tsconfig.json +12 -12
|
@@ -1,301 +1,301 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* YouTube transcript extraction via yt-dlp.
|
|
3
|
-
*
|
|
4
|
-
* Kept in a separate module so transcript process logic stays isolated.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import childProcess from 'child_process';
|
|
8
|
-
import { mkdtemp, readFile, readdir, rm } from 'fs/promises';
|
|
9
|
-
import { tmpdir } from 'os';
|
|
10
|
-
import { join } from 'path';
|
|
11
|
-
|
|
12
|
-
const runProgram = childProcess.execFile;
|
|
13
|
-
|
|
14
|
-
const YT_DLP_CANDIDATES = ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp'];
|
|
15
|
-
const SAFE_ENV_KEYS = ['PATH', 'HOME', 'LANG', 'LC_ALL', 'LC_CTYPE', 'TMPDIR'];
|
|
16
|
-
const LANG_RE = /^[a-z]{2,3}(?:-[a-zA-Z0-9]{2,8})?$/;
|
|
17
|
-
|
|
18
|
-
// Detect yt-dlp binary at startup
|
|
19
|
-
let ytDlpPath = null;
|
|
20
|
-
|
|
21
|
-
function buildSafeEnv() {
|
|
22
|
-
const env = {};
|
|
23
|
-
for (const key of SAFE_ENV_KEYS) {
|
|
24
|
-
const value = process.env[key];
|
|
25
|
-
if (typeof value === 'string' && value.length > 0) {
|
|
26
|
-
env[key] = value;
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
return env;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function normalizeYoutubeUrl(rawUrl) {
|
|
33
|
-
const url = String(rawUrl || '').trim();
|
|
34
|
-
if (!url) {
|
|
35
|
-
throw new Error('Missing video URL');
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
let parsed;
|
|
39
|
-
try {
|
|
40
|
-
parsed = new URL(url);
|
|
41
|
-
} catch {
|
|
42
|
-
throw new Error('Invalid video URL');
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
46
|
-
throw new Error('Unsupported URL scheme');
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const host = parsed.hostname.toLowerCase();
|
|
50
|
-
const isYoutubeHost = host === 'youtube.com' || host.endsWith('.youtube.com');
|
|
51
|
-
const isShortHost = host === 'youtu.be';
|
|
52
|
-
if (!isYoutubeHost && !isShortHost) {
|
|
53
|
-
throw new Error('Only YouTube URLs are allowed');
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
return parsed.toString();
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
function normalizeLanguage(rawLang) {
|
|
60
|
-
const lang = String(rawLang || 'en').trim();
|
|
61
|
-
if (!LANG_RE.test(lang)) {
|
|
62
|
-
return 'en';
|
|
63
|
-
}
|
|
64
|
-
return lang;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
async function runYtDlp(binary, args, timeoutMs) {
|
|
68
|
-
return await new Promise((resolve, reject) => {
|
|
69
|
-
runProgram(
|
|
70
|
-
binary,
|
|
71
|
-
args,
|
|
72
|
-
{
|
|
73
|
-
timeout: timeoutMs,
|
|
74
|
-
windowsHide: true,
|
|
75
|
-
env: buildSafeEnv(),
|
|
76
|
-
maxBuffer: 4 * 1024 * 1024,
|
|
77
|
-
},
|
|
78
|
-
(err, stdout = '', stderr = '') => {
|
|
79
|
-
if (err) {
|
|
80
|
-
return reject(new Error(`${err.message}\n${String(stderr).trim()}`.trim()));
|
|
81
|
-
}
|
|
82
|
-
resolve({ stdout: String(stdout), stderr: String(stderr) });
|
|
83
|
-
},
|
|
84
|
-
);
|
|
85
|
-
});
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
async function detectYtDlp(log) {
|
|
89
|
-
for (const candidate of YT_DLP_CANDIDATES) {
|
|
90
|
-
try {
|
|
91
|
-
await runYtDlp(candidate, ['--version'], 5000);
|
|
92
|
-
ytDlpPath = candidate;
|
|
93
|
-
log('info', 'yt-dlp found', { path: candidate });
|
|
94
|
-
return true;
|
|
95
|
-
} catch {}
|
|
96
|
-
}
|
|
97
|
-
log('warn', 'yt-dlp not found -- YouTube transcript endpoint will use browser fallback');
|
|
98
|
-
return false;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function hasYtDlp() {
|
|
102
|
-
return ytDlpPath !== null;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* Re-detect yt-dlp if initial startup detection failed.
|
|
107
|
-
* Called lazily before each transcript request so a transient
|
|
108
|
-
* startup failure doesn't permanently disable yt-dlp.
|
|
109
|
-
*/
|
|
110
|
-
async function ensureYtDlp(log) {
|
|
111
|
-
if (ytDlpPath) return true;
|
|
112
|
-
return await detectYtDlp(log);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
async function ytDlpTranscript(reqId, url, videoId, lang, proxyUrl = null) {
|
|
116
|
-
if (!ytDlpPath) {
|
|
117
|
-
throw new Error('yt-dlp is not available');
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const normalizedUrl = normalizeYoutubeUrl(url);
|
|
121
|
-
const normalizedLang = normalizeLanguage(lang);
|
|
122
|
-
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
123
|
-
|
|
124
|
-
// Build proxy args if a proxy URL is provided
|
|
125
|
-
const proxyArgs = proxyUrl ? ['--proxy', proxyUrl] : [];
|
|
126
|
-
|
|
127
|
-
try {
|
|
128
|
-
const titleResult = await runYtDlp(
|
|
129
|
-
ytDlpPath,
|
|
130
|
-
[...proxyArgs, '--skip-download', '--no-warnings', '--print', '%(title)s', normalizedUrl],
|
|
131
|
-
15000,
|
|
132
|
-
);
|
|
133
|
-
const title = titleResult.stdout.trim().split('\n')[0] || '';
|
|
134
|
-
|
|
135
|
-
await runYtDlp(
|
|
136
|
-
ytDlpPath,
|
|
137
|
-
[
|
|
138
|
-
...proxyArgs,
|
|
139
|
-
'--skip-download',
|
|
140
|
-
'--write-sub',
|
|
141
|
-
'--write-auto-sub',
|
|
142
|
-
'--sub-lang',
|
|
143
|
-
normalizedLang,
|
|
144
|
-
'--sub-format',
|
|
145
|
-
'json3',
|
|
146
|
-
'-o',
|
|
147
|
-
join(tmpDir, '%(id)s'),
|
|
148
|
-
normalizedUrl,
|
|
149
|
-
],
|
|
150
|
-
30000,
|
|
151
|
-
);
|
|
152
|
-
|
|
153
|
-
const files = await readdir(tmpDir);
|
|
154
|
-
const subFile = files.find((f) => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
155
|
-
if (!subFile) {
|
|
156
|
-
return {
|
|
157
|
-
status: 'error',
|
|
158
|
-
code: 404,
|
|
159
|
-
message: 'No captions available for this video',
|
|
160
|
-
video_url: normalizedUrl,
|
|
161
|
-
video_id: videoId,
|
|
162
|
-
title,
|
|
163
|
-
};
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
167
|
-
let transcriptText = null;
|
|
168
|
-
|
|
169
|
-
if (subFile.endsWith('.json3')) {
|
|
170
|
-
transcriptText = parseJson3(content);
|
|
171
|
-
} else if (subFile.endsWith('.vtt')) {
|
|
172
|
-
transcriptText = parseVtt(content);
|
|
173
|
-
} else {
|
|
174
|
-
transcriptText = parseXml(content);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (!transcriptText || !transcriptText.trim()) {
|
|
178
|
-
return {
|
|
179
|
-
status: 'error',
|
|
180
|
-
code: 404,
|
|
181
|
-
message: 'Subtitle file found but content was empty',
|
|
182
|
-
video_url: normalizedUrl,
|
|
183
|
-
video_id: videoId,
|
|
184
|
-
title,
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
189
|
-
|
|
190
|
-
return {
|
|
191
|
-
status: 'ok',
|
|
192
|
-
transcript: transcriptText,
|
|
193
|
-
video_url: normalizedUrl,
|
|
194
|
-
video_id: videoId,
|
|
195
|
-
video_title: title,
|
|
196
|
-
language: langMatch?.[1] || normalizedLang,
|
|
197
|
-
total_words: transcriptText.split(/\s+/).length,
|
|
198
|
-
};
|
|
199
|
-
} finally {
|
|
200
|
-
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// --- Parsers ---
|
|
205
|
-
|
|
206
|
-
function parseJson3(content) {
|
|
207
|
-
try {
|
|
208
|
-
const data = JSON.parse(content);
|
|
209
|
-
const events = data.events || [];
|
|
210
|
-
const lines = [];
|
|
211
|
-
for (const event of events) {
|
|
212
|
-
const segs = event.segs || [];
|
|
213
|
-
if (!segs.length) continue;
|
|
214
|
-
const text = segs
|
|
215
|
-
.map((s) => s.utf8 || '')
|
|
216
|
-
.join('')
|
|
217
|
-
.trim();
|
|
218
|
-
if (!text) continue;
|
|
219
|
-
const tsMs = event.tStartMs || 0;
|
|
220
|
-
const tsSec = Math.floor(tsMs / 1000);
|
|
221
|
-
const mm = Math.floor(tsSec / 60);
|
|
222
|
-
const ss = tsSec % 60;
|
|
223
|
-
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
224
|
-
}
|
|
225
|
-
return lines.join('\n');
|
|
226
|
-
} catch (e) {
|
|
227
|
-
return null;
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
function parseVtt(content) {
|
|
232
|
-
const lines = content.split('\n');
|
|
233
|
-
const result = [];
|
|
234
|
-
let currentTimestamp = '';
|
|
235
|
-
for (const line of lines) {
|
|
236
|
-
const stripped = line.trim();
|
|
237
|
-
if (
|
|
238
|
-
!stripped ||
|
|
239
|
-
stripped === 'WEBVTT' ||
|
|
240
|
-
stripped.startsWith('Kind:') ||
|
|
241
|
-
stripped.startsWith('Language:') ||
|
|
242
|
-
stripped.startsWith('NOTE')
|
|
243
|
-
)
|
|
244
|
-
continue;
|
|
245
|
-
if (stripped.includes(' --> ')) {
|
|
246
|
-
const parts = stripped.split(' --> ');
|
|
247
|
-
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
248
|
-
continue;
|
|
249
|
-
}
|
|
250
|
-
const text = stripped
|
|
251
|
-
.replace(/<[^>]+>/g, '')
|
|
252
|
-
.replace(/&/g, '&')
|
|
253
|
-
.replace(/</g, '<')
|
|
254
|
-
.replace(/>/g, '>')
|
|
255
|
-
.replace(/"/g, '"')
|
|
256
|
-
.replace(/'/g, "'")
|
|
257
|
-
.trim();
|
|
258
|
-
if (text && currentTimestamp) {
|
|
259
|
-
result.push(`[${currentTimestamp}] ${text}`);
|
|
260
|
-
currentTimestamp = '';
|
|
261
|
-
} else if (text) result.push(text);
|
|
262
|
-
}
|
|
263
|
-
return result.join('\n');
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
function parseXml(content) {
|
|
267
|
-
const lines = [];
|
|
268
|
-
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
269
|
-
for (const match of content.matchAll(regex)) {
|
|
270
|
-
const startSec = parseFloat(match[1]) || 0;
|
|
271
|
-
const text = match[2]
|
|
272
|
-
.replace(/<[^>]+>/g, '')
|
|
273
|
-
.replace(/&/g, '&')
|
|
274
|
-
.replace(/</g, '<')
|
|
275
|
-
.replace(/>/g, '>')
|
|
276
|
-
.replace(/"/g, '"')
|
|
277
|
-
.replace(/'/g, "'")
|
|
278
|
-
.trim();
|
|
279
|
-
if (!text) continue;
|
|
280
|
-
const mm = Math.floor(startSec / 60);
|
|
281
|
-
const ss = Math.floor(startSec % 60);
|
|
282
|
-
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
283
|
-
}
|
|
284
|
-
return lines.join('\n');
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
function formatVttTs(ts) {
|
|
288
|
-
const parts = ts.split(':');
|
|
289
|
-
if (parts.length >= 3) {
|
|
290
|
-
const hours = parseInt(parts[0]) || 0;
|
|
291
|
-
const minutes = parseInt(parts[1]) || 0;
|
|
292
|
-
const totalMin = hours * 60 + minutes;
|
|
293
|
-
const seconds = (parts[2] || '00').split('.')[0];
|
|
294
|
-
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
295
|
-
} else if (parts.length === 2) {
|
|
296
|
-
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
297
|
-
}
|
|
298
|
-
return ts;
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
export { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
|
1
|
+
/**
|
|
2
|
+
* YouTube transcript extraction via yt-dlp.
|
|
3
|
+
*
|
|
4
|
+
* Kept in a separate module so transcript process logic stays isolated.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import childProcess from 'child_process';
|
|
8
|
+
import { mkdtemp, readFile, readdir, rm } from 'fs/promises';
|
|
9
|
+
import { tmpdir } from 'os';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
|
|
12
|
+
const runProgram = childProcess.execFile;
|
|
13
|
+
|
|
14
|
+
const YT_DLP_CANDIDATES = ['yt-dlp', '/usr/local/bin/yt-dlp', '/usr/bin/yt-dlp'];
|
|
15
|
+
const SAFE_ENV_KEYS = ['PATH', 'HOME', 'LANG', 'LC_ALL', 'LC_CTYPE', 'TMPDIR'];
|
|
16
|
+
const LANG_RE = /^[a-z]{2,3}(?:-[a-zA-Z0-9]{2,8})?$/;
|
|
17
|
+
|
|
18
|
+
// Detect yt-dlp binary at startup
|
|
19
|
+
let ytDlpPath = null;
|
|
20
|
+
|
|
21
|
+
function buildSafeEnv() {
|
|
22
|
+
const env = {};
|
|
23
|
+
for (const key of SAFE_ENV_KEYS) {
|
|
24
|
+
const value = process.env[key];
|
|
25
|
+
if (typeof value === 'string' && value.length > 0) {
|
|
26
|
+
env[key] = value;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return env;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeYoutubeUrl(rawUrl) {
|
|
33
|
+
const url = String(rawUrl || '').trim();
|
|
34
|
+
if (!url) {
|
|
35
|
+
throw new Error('Missing video URL');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let parsed;
|
|
39
|
+
try {
|
|
40
|
+
parsed = new URL(url);
|
|
41
|
+
} catch {
|
|
42
|
+
throw new Error('Invalid video URL');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
46
|
+
throw new Error('Unsupported URL scheme');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const host = parsed.hostname.toLowerCase();
|
|
50
|
+
const isYoutubeHost = host === 'youtube.com' || host.endsWith('.youtube.com');
|
|
51
|
+
const isShortHost = host === 'youtu.be';
|
|
52
|
+
if (!isYoutubeHost && !isShortHost) {
|
|
53
|
+
throw new Error('Only YouTube URLs are allowed');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return parsed.toString();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function normalizeLanguage(rawLang) {
|
|
60
|
+
const lang = String(rawLang || 'en').trim();
|
|
61
|
+
if (!LANG_RE.test(lang)) {
|
|
62
|
+
return 'en';
|
|
63
|
+
}
|
|
64
|
+
return lang;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function runYtDlp(binary, args, timeoutMs) {
|
|
68
|
+
return await new Promise((resolve, reject) => {
|
|
69
|
+
runProgram(
|
|
70
|
+
binary,
|
|
71
|
+
args,
|
|
72
|
+
{
|
|
73
|
+
timeout: timeoutMs,
|
|
74
|
+
windowsHide: true,
|
|
75
|
+
env: buildSafeEnv(),
|
|
76
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
77
|
+
},
|
|
78
|
+
(err, stdout = '', stderr = '') => {
|
|
79
|
+
if (err) {
|
|
80
|
+
return reject(new Error(`${err.message}\n${String(stderr).trim()}`.trim()));
|
|
81
|
+
}
|
|
82
|
+
resolve({ stdout: String(stdout), stderr: String(stderr) });
|
|
83
|
+
},
|
|
84
|
+
);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function detectYtDlp(log) {
|
|
89
|
+
for (const candidate of YT_DLP_CANDIDATES) {
|
|
90
|
+
try {
|
|
91
|
+
await runYtDlp(candidate, ['--version'], 5000);
|
|
92
|
+
ytDlpPath = candidate;
|
|
93
|
+
log('info', 'yt-dlp found', { path: candidate });
|
|
94
|
+
return true;
|
|
95
|
+
} catch {}
|
|
96
|
+
}
|
|
97
|
+
log('warn', 'yt-dlp not found -- YouTube transcript endpoint will use browser fallback');
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function hasYtDlp() {
|
|
102
|
+
return ytDlpPath !== null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Re-detect yt-dlp if initial startup detection failed.
|
|
107
|
+
* Called lazily before each transcript request so a transient
|
|
108
|
+
* startup failure doesn't permanently disable yt-dlp.
|
|
109
|
+
*/
|
|
110
|
+
async function ensureYtDlp(log) {
|
|
111
|
+
if (ytDlpPath) return true;
|
|
112
|
+
return await detectYtDlp(log);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function ytDlpTranscript(reqId, url, videoId, lang, proxyUrl = null) {
|
|
116
|
+
if (!ytDlpPath) {
|
|
117
|
+
throw new Error('yt-dlp is not available');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const normalizedUrl = normalizeYoutubeUrl(url);
|
|
121
|
+
const normalizedLang = normalizeLanguage(lang);
|
|
122
|
+
const tmpDir = await mkdtemp(join(tmpdir(), 'yt-'));
|
|
123
|
+
|
|
124
|
+
// Build proxy args if a proxy URL is provided
|
|
125
|
+
const proxyArgs = proxyUrl ? ['--proxy', proxyUrl] : [];
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
const titleResult = await runYtDlp(
|
|
129
|
+
ytDlpPath,
|
|
130
|
+
[...proxyArgs, '--skip-download', '--no-warnings', '--print', '%(title)s', normalizedUrl],
|
|
131
|
+
15000,
|
|
132
|
+
);
|
|
133
|
+
const title = titleResult.stdout.trim().split('\n')[0] || '';
|
|
134
|
+
|
|
135
|
+
await runYtDlp(
|
|
136
|
+
ytDlpPath,
|
|
137
|
+
[
|
|
138
|
+
...proxyArgs,
|
|
139
|
+
'--skip-download',
|
|
140
|
+
'--write-sub',
|
|
141
|
+
'--write-auto-sub',
|
|
142
|
+
'--sub-lang',
|
|
143
|
+
normalizedLang,
|
|
144
|
+
'--sub-format',
|
|
145
|
+
'json3',
|
|
146
|
+
'-o',
|
|
147
|
+
join(tmpDir, '%(id)s'),
|
|
148
|
+
normalizedUrl,
|
|
149
|
+
],
|
|
150
|
+
30000,
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
const files = await readdir(tmpDir);
|
|
154
|
+
const subFile = files.find((f) => f.endsWith('.json3') || f.endsWith('.vtt') || f.endsWith('.srv3'));
|
|
155
|
+
if (!subFile) {
|
|
156
|
+
return {
|
|
157
|
+
status: 'error',
|
|
158
|
+
code: 404,
|
|
159
|
+
message: 'No captions available for this video',
|
|
160
|
+
video_url: normalizedUrl,
|
|
161
|
+
video_id: videoId,
|
|
162
|
+
title,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const content = await readFile(join(tmpDir, subFile), 'utf8');
|
|
167
|
+
let transcriptText = null;
|
|
168
|
+
|
|
169
|
+
if (subFile.endsWith('.json3')) {
|
|
170
|
+
transcriptText = parseJson3(content);
|
|
171
|
+
} else if (subFile.endsWith('.vtt')) {
|
|
172
|
+
transcriptText = parseVtt(content);
|
|
173
|
+
} else {
|
|
174
|
+
transcriptText = parseXml(content);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (!transcriptText || !transcriptText.trim()) {
|
|
178
|
+
return {
|
|
179
|
+
status: 'error',
|
|
180
|
+
code: 404,
|
|
181
|
+
message: 'Subtitle file found but content was empty',
|
|
182
|
+
video_url: normalizedUrl,
|
|
183
|
+
video_id: videoId,
|
|
184
|
+
title,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const langMatch = subFile.match(/\.([a-z]{2}(?:-[a-zA-Z]+)?)\.(?:json3|vtt|srv3)$/);
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
status: 'ok',
|
|
192
|
+
transcript: transcriptText,
|
|
193
|
+
video_url: normalizedUrl,
|
|
194
|
+
video_id: videoId,
|
|
195
|
+
video_title: title,
|
|
196
|
+
language: langMatch?.[1] || normalizedLang,
|
|
197
|
+
total_words: transcriptText.split(/\s+/).length,
|
|
198
|
+
};
|
|
199
|
+
} finally {
|
|
200
|
+
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// --- Parsers ---
|
|
205
|
+
|
|
206
|
+
function parseJson3(content) {
|
|
207
|
+
try {
|
|
208
|
+
const data = JSON.parse(content);
|
|
209
|
+
const events = data.events || [];
|
|
210
|
+
const lines = [];
|
|
211
|
+
for (const event of events) {
|
|
212
|
+
const segs = event.segs || [];
|
|
213
|
+
if (!segs.length) continue;
|
|
214
|
+
const text = segs
|
|
215
|
+
.map((s) => s.utf8 || '')
|
|
216
|
+
.join('')
|
|
217
|
+
.trim();
|
|
218
|
+
if (!text) continue;
|
|
219
|
+
const tsMs = event.tStartMs || 0;
|
|
220
|
+
const tsSec = Math.floor(tsMs / 1000);
|
|
221
|
+
const mm = Math.floor(tsSec / 60);
|
|
222
|
+
const ss = tsSec % 60;
|
|
223
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
224
|
+
}
|
|
225
|
+
return lines.join('\n');
|
|
226
|
+
} catch (e) {
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function parseVtt(content) {
|
|
232
|
+
const lines = content.split('\n');
|
|
233
|
+
const result = [];
|
|
234
|
+
let currentTimestamp = '';
|
|
235
|
+
for (const line of lines) {
|
|
236
|
+
const stripped = line.trim();
|
|
237
|
+
if (
|
|
238
|
+
!stripped ||
|
|
239
|
+
stripped === 'WEBVTT' ||
|
|
240
|
+
stripped.startsWith('Kind:') ||
|
|
241
|
+
stripped.startsWith('Language:') ||
|
|
242
|
+
stripped.startsWith('NOTE')
|
|
243
|
+
)
|
|
244
|
+
continue;
|
|
245
|
+
if (stripped.includes(' --> ')) {
|
|
246
|
+
const parts = stripped.split(' --> ');
|
|
247
|
+
if (parts[0]) currentTimestamp = formatVttTs(parts[0].trim());
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
const text = stripped
|
|
251
|
+
.replace(/<[^>]+>/g, '')
|
|
252
|
+
.replace(/&/g, '&')
|
|
253
|
+
.replace(/</g, '<')
|
|
254
|
+
.replace(/>/g, '>')
|
|
255
|
+
.replace(/"/g, '"')
|
|
256
|
+
.replace(/'/g, "'")
|
|
257
|
+
.trim();
|
|
258
|
+
if (text && currentTimestamp) {
|
|
259
|
+
result.push(`[${currentTimestamp}] ${text}`);
|
|
260
|
+
currentTimestamp = '';
|
|
261
|
+
} else if (text) result.push(text);
|
|
262
|
+
}
|
|
263
|
+
return result.join('\n');
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function parseXml(content) {
|
|
267
|
+
const lines = [];
|
|
268
|
+
const regex = /<text\s+start="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
|
|
269
|
+
for (const match of content.matchAll(regex)) {
|
|
270
|
+
const startSec = parseFloat(match[1]) || 0;
|
|
271
|
+
const text = match[2]
|
|
272
|
+
.replace(/<[^>]+>/g, '')
|
|
273
|
+
.replace(/&/g, '&')
|
|
274
|
+
.replace(/</g, '<')
|
|
275
|
+
.replace(/>/g, '>')
|
|
276
|
+
.replace(/"/g, '"')
|
|
277
|
+
.replace(/'/g, "'")
|
|
278
|
+
.trim();
|
|
279
|
+
if (!text) continue;
|
|
280
|
+
const mm = Math.floor(startSec / 60);
|
|
281
|
+
const ss = Math.floor(startSec % 60);
|
|
282
|
+
lines.push(`[${String(mm).padStart(2, '0')}:${String(ss).padStart(2, '0')}] ${text}`);
|
|
283
|
+
}
|
|
284
|
+
return lines.join('\n');
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function formatVttTs(ts) {
|
|
288
|
+
const parts = ts.split(':');
|
|
289
|
+
if (parts.length >= 3) {
|
|
290
|
+
const hours = parseInt(parts[0]) || 0;
|
|
291
|
+
const minutes = parseInt(parts[1]) || 0;
|
|
292
|
+
const totalMin = hours * 60 + minutes;
|
|
293
|
+
const seconds = (parts[2] || '00').split('.')[0];
|
|
294
|
+
return `${String(totalMin).padStart(2, '0')}:${seconds}`;
|
|
295
|
+
} else if (parts.length === 2) {
|
|
296
|
+
return `${String(parseInt(parts[0])).padStart(2, '0')}:${(parts[1] || '00').split('.')[0]}`;
|
|
297
|
+
}
|
|
298
|
+
return ts;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export { detectYtDlp, hasYtDlp, ensureYtDlp, ytDlpTranscript, parseJson3, parseVtt, parseXml };
|
package/run.sh
CHANGED
|
@@ -1,37 +1,37 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Local development script for camofox-browser
|
|
3
|
-
# Usage: ./run.sh [-p port]
|
|
4
|
-
# Example: ./run.sh -p 3001
|
|
5
|
-
|
|
6
|
-
CAMOFOX_PORT=3000
|
|
7
|
-
while getopts "p:" opt; do
|
|
8
|
-
case $opt in
|
|
9
|
-
p) CAMOFOX_PORT="$OPTARG" ;;
|
|
10
|
-
*) echo "Usage: $0 [-p port]"; exit 1 ;;
|
|
11
|
-
esac
|
|
12
|
-
done
|
|
13
|
-
export CAMOFOX_PORT
|
|
14
|
-
|
|
15
|
-
# Install deps if needed
|
|
16
|
-
if [ ! -d "node_modules" ]; then
|
|
17
|
-
echo "Installing dependencies..."
|
|
18
|
-
npm install
|
|
19
|
-
fi
|
|
20
|
-
|
|
21
|
-
# Check if camoufox browser is installed
|
|
22
|
-
if ! npx camoufox-js --version &> /dev/null 2>&1; then
|
|
23
|
-
echo "Fetching Camoufox browser..."
|
|
24
|
-
npx camoufox-js fetch
|
|
25
|
-
fi
|
|
26
|
-
|
|
27
|
-
# Install nodemon globally if not available
|
|
28
|
-
if ! command -v nodemon &> /dev/null; then
|
|
29
|
-
echo "Installing nodemon..."
|
|
30
|
-
npm install -g nodemon
|
|
31
|
-
fi
|
|
32
|
-
|
|
33
|
-
echo "Starting camofox-browser on http://localhost:$CAMOFOX_PORT (with auto-reload)"
|
|
34
|
-
echo "Logs: /tmp/camofox-browser.log"
|
|
35
|
-
nodemon --watch server.js --exec "node --max-old-space-size=128 server.js" 2>&1 | while IFS= read -r line; do
|
|
36
|
-
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line"
|
|
37
|
-
done | tee -a /tmp/camofox-browser.log
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Local development script for camofox-browser
|
|
3
|
+
# Usage: ./run.sh [-p port]
|
|
4
|
+
# Example: ./run.sh -p 3001
|
|
5
|
+
|
|
6
|
+
CAMOFOX_PORT=3000
|
|
7
|
+
while getopts "p:" opt; do
|
|
8
|
+
case $opt in
|
|
9
|
+
p) CAMOFOX_PORT="$OPTARG" ;;
|
|
10
|
+
*) echo "Usage: $0 [-p port]"; exit 1 ;;
|
|
11
|
+
esac
|
|
12
|
+
done
|
|
13
|
+
export CAMOFOX_PORT
|
|
14
|
+
|
|
15
|
+
# Install deps if needed
|
|
16
|
+
if [ ! -d "node_modules" ]; then
|
|
17
|
+
echo "Installing dependencies..."
|
|
18
|
+
npm install
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
# Check if camoufox browser is installed
|
|
22
|
+
if ! npx camoufox-js --version &> /dev/null 2>&1; then
|
|
23
|
+
echo "Fetching Camoufox browser..."
|
|
24
|
+
npx camoufox-js fetch
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
# Install nodemon globally if not available
|
|
28
|
+
if ! command -v nodemon &> /dev/null; then
|
|
29
|
+
echo "Installing nodemon..."
|
|
30
|
+
npm install -g nodemon
|
|
31
|
+
fi
|
|
32
|
+
|
|
33
|
+
echo "Starting camofox-browser on http://localhost:$CAMOFOX_PORT (with auto-reload)"
|
|
34
|
+
echo "Logs: /tmp/camofox-browser.log"
|
|
35
|
+
nodemon --watch server.js --exec "node --max-old-space-size=128 server.js" 2>&1 | while IFS= read -r line; do
|
|
36
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line"
|
|
37
|
+
done | tee -a /tmp/camofox-browser.log
|