tuna-agent 0.1.155 → 0.1.157
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -116,6 +116,85 @@ function run(cmd, args, opts = {}) {
|
|
|
116
116
|
p.on('close', (code) => code === 0 ? resolve({ out, err }) : reject(new Error(`${cmd} exit ${code}: ${err.slice(0, 500)}`)));
|
|
117
117
|
});
|
|
118
118
|
}
|
|
119
|
+
// Modern Chrome UA — TikTok/Douyin reject unknown user-agents.
|
|
120
|
+
const SRC_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
|
|
121
|
+
// Optional cookies for auth-gated sources (Facebook private/page videos,
|
|
122
|
+
// some Douyin). YT_DLP_COOKIES = path to a Netscape cookies.txt; or
|
|
123
|
+
// YT_DLP_COOKIES_FROM_BROWSER = a browser name yt-dlp can read cookies from.
|
|
124
|
+
function cookieArgs() {
|
|
125
|
+
if (process.env.YT_DLP_COOKIES)
|
|
126
|
+
return ['--cookies', process.env.YT_DLP_COOKIES];
|
|
127
|
+
if (process.env.YT_DLP_COOKIES_FROM_BROWSER)
|
|
128
|
+
return ['--cookies-from-browser', process.env.YT_DLP_COOKIES_FROM_BROWSER];
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
function detectPlatform(url) {
|
|
132
|
+
let h = '';
|
|
133
|
+
try {
|
|
134
|
+
h = new URL(url).hostname.toLowerCase();
|
|
135
|
+
}
|
|
136
|
+
catch { /* malformed → 'other' */ }
|
|
137
|
+
if (/(^|\.)youtube\.com$|(^|\.)youtu\.be$/.test(h))
|
|
138
|
+
return 'youtube';
|
|
139
|
+
if (/(^|\.)tiktok\.com$/.test(h))
|
|
140
|
+
return 'tiktok';
|
|
141
|
+
if (/douyin\.com$|iesdouyin\.com$/.test(h))
|
|
142
|
+
return 'douyin';
|
|
143
|
+
if (/facebook\.com$|fb\.watch$|(^|\.)fb\.com$/.test(h))
|
|
144
|
+
return 'facebook';
|
|
145
|
+
return 'other';
|
|
146
|
+
}
|
|
147
|
+
// yt-dlp `%(title)s` is a clean title on YouTube but the full post caption
|
|
148
|
+
// on TikTok/Douyin/Facebook (often 200+ chars, prefixed with engagement
|
|
149
|
+
// stats). Strip the social-stats prefix, keep the first segment, and cap
|
|
150
|
+
// to a sane title length so the cloned idea doesn't get a paragraph title.
|
|
151
|
+
function cleanSourceTitle(raw) {
|
|
152
|
+
let t = (raw || '').trim().split('\n')[0].trim();
|
|
153
|
+
// Drop leading "1K views · 51 reactions | " / "230 likes, 12 comments - " noise.
|
|
154
|
+
t = t.replace(/^\s*(?:[\d.,]+\s*[KMB]?\s*(?:views?|likes?|reactions?|comments?|shares?|followers?)\b\s*[·,|–\-:]*\s*)+/i, '').trim();
|
|
155
|
+
// FB/TikTok pack "title | extra | hashtags" — keep the first real segment.
|
|
156
|
+
const seg = t.split(/\s*[|·]\s*/)[0].trim();
|
|
157
|
+
if (seg.length >= 8)
|
|
158
|
+
t = seg;
|
|
159
|
+
if (t.length > 90)
|
|
160
|
+
t = t.slice(0, 90).replace(/\s+\S*$/, '').trim() + '…';
|
|
161
|
+
return t.slice(0, 120);
|
|
162
|
+
}
|
|
163
|
+
// Download a source video across YouTube / TikTok / Douyin / Facebook.
|
|
164
|
+
// yt-dlp supports all of them, but a single rigid `-f` that works for
|
|
165
|
+
// YouTube fails on the others, so try a tolerant 720p-capped format then
|
|
166
|
+
// fall back to letting yt-dlp pick. UA + optional cookies harden the
|
|
167
|
+
// non-YouTube extractors (FB private/page + Douyin need cookies).
|
|
168
|
+
async function downloadSourceVideo(url, dest) {
|
|
169
|
+
const platform = detectPlatform(url);
|
|
170
|
+
const common = [
|
|
171
|
+
'--no-playlist', '--no-warnings', '--retries', '3', '--fragment-retries', '3',
|
|
172
|
+
// dest ends in .mp4 — force merged/odd containers to mp4 so the file
|
|
173
|
+
// lands EXACTLY at `dest` (else fs.rename → ENOENT, e.g. YouTube shorts
|
|
174
|
+
// where bv*+ba merges to .mkv/.webm).
|
|
175
|
+
'--merge-output-format', 'mp4', '--remux-video', 'mp4',
|
|
176
|
+
'--user-agent', SRC_UA, ...cookieArgs(), '-o', dest, url,
|
|
177
|
+
];
|
|
178
|
+
const attempts = [
|
|
179
|
+
['-f', 'bv*[height<=720]+ba/b[height<=720]/best', ...common],
|
|
180
|
+
['-f', 'best/mp4', ...common], // let yt-dlp choose (TikTok/Douyin/FB quirks)
|
|
181
|
+
];
|
|
182
|
+
let lastErr;
|
|
183
|
+
for (let i = 0; i < attempts.length; i++) {
|
|
184
|
+
try {
|
|
185
|
+
await run(YT_DLP, attempts[i]);
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
catch (e) {
|
|
189
|
+
lastErr = e;
|
|
190
|
+
console.warn(`[analyze_video] yt-dlp attempt ${i + 1}/${attempts.length} failed (${platform}): ${String(e?.message || e).slice(0, 220)}`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
const hint = (platform === 'facebook' || platform === 'douyin')
|
|
194
|
+
? ' — FB private/page & some Douyin need cookies (set YT_DLP_COOKIES)'
|
|
195
|
+
: '';
|
|
196
|
+
throw new Error(`yt-dlp failed for ${platform} after ${attempts.length} attempts${hint}: ${String(lastErr?.message || lastErr).slice(0, 300)}`);
|
|
197
|
+
}
|
|
119
198
|
async function whisperTranscribe(audioPath) {
|
|
120
199
|
if (!OPENAI_KEY)
|
|
121
200
|
throw new Error('OPENAI_API_KEY not set');
|
|
@@ -450,7 +529,7 @@ export async function analyzeVideo(url, onProgress) {
|
|
|
450
529
|
// analyze of the same URL never reads a half-written file.
|
|
451
530
|
const dlTmp = path.join(CACHE_DIR, `${urlHash}.dl-${crypto.randomBytes(4).toString('hex')}.mp4`);
|
|
452
531
|
try {
|
|
453
|
-
await
|
|
532
|
+
await downloadSourceVideo(url, dlTmp);
|
|
454
533
|
await fs.rename(dlTmp, videoPath);
|
|
455
534
|
}
|
|
456
535
|
catch (e) {
|
|
@@ -462,8 +541,8 @@ export async function analyzeVideo(url, onProgress) {
|
|
|
462
541
|
// clone idea gets a real name instead of "Clone: www.youtube.com".
|
|
463
542
|
let source_title = '';
|
|
464
543
|
try {
|
|
465
|
-
const t = await run(YT_DLP, ['--skip-download', '--no-warnings', '--no-playlist', '--print', '%(title)s', url]);
|
|
466
|
-
source_title = (t.out
|
|
544
|
+
const t = await run(YT_DLP, ['--skip-download', '--no-warnings', '--no-playlist', '--user-agent', SRC_UA, ...cookieArgs(), '--print', '%(title)s', url]);
|
|
545
|
+
source_title = cleanSourceTitle(t.out);
|
|
467
546
|
}
|
|
468
547
|
catch { /* title is best-effort — analysis still proceeds without it */ }
|
|
469
548
|
progress('Đang tách audio...');
|