tuna-agent 0.1.155 → 0.1.157

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,6 +116,85 @@ function run(cmd, args, opts = {}) {
116
116
  p.on('close', (code) => code === 0 ? resolve({ out, err }) : reject(new Error(`${cmd} exit ${code}: ${err.slice(0, 500)}`)));
117
117
  });
118
118
  }
119
+ // Modern Chrome UA — TikTok/Douyin reject unknown user-agents.
120
+ const SRC_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36';
121
+ // Optional cookies for auth-gated sources (Facebook private/page videos,
122
+ // some Douyin). YT_DLP_COOKIES = path to a Netscape cookies.txt; or
123
+ // YT_DLP_COOKIES_FROM_BROWSER = a browser name yt-dlp can read cookies from.
124
+ function cookieArgs() {
125
+ if (process.env.YT_DLP_COOKIES)
126
+ return ['--cookies', process.env.YT_DLP_COOKIES];
127
+ if (process.env.YT_DLP_COOKIES_FROM_BROWSER)
128
+ return ['--cookies-from-browser', process.env.YT_DLP_COOKIES_FROM_BROWSER];
129
+ return [];
130
+ }
131
+ function detectPlatform(url) {
132
+ let h = '';
133
+ try {
134
+ h = new URL(url).hostname.toLowerCase();
135
+ }
136
+ catch { /* malformed → 'other' */ }
137
+ if (/(^|\.)youtube\.com$|(^|\.)youtu\.be$/.test(h))
138
+ return 'youtube';
139
+ if (/(^|\.)tiktok\.com$/.test(h))
140
+ return 'tiktok';
141
+ if (/douyin\.com$|iesdouyin\.com$/.test(h))
142
+ return 'douyin';
143
+ if (/facebook\.com$|fb\.watch$|(^|\.)fb\.com$/.test(h))
144
+ return 'facebook';
145
+ return 'other';
146
+ }
147
+ // yt-dlp `%(title)s` is a clean title on YouTube but the full post caption
148
+ // on TikTok/Douyin/Facebook (often 200+ chars, prefixed with engagement
149
+ // stats). Strip the social-stats prefix, keep the first segment, and cap
150
+ // to a sane title length so the cloned idea doesn't get a paragraph title.
151
+ function cleanSourceTitle(raw) {
152
+ let t = (raw || '').trim().split('\n')[0].trim();
153
+ // Drop leading "1K views · 51 reactions | " / "230 likes, 12 comments - " noise.
154
+ t = t.replace(/^\s*(?:[\d.,]+\s*[KMB]?\s*(?:views?|likes?|reactions?|comments?|shares?|followers?)\b\s*[·,|–\-:]*\s*)+/i, '').trim();
155
+ // FB/TikTok pack "title | extra | hashtags" — keep the first real segment.
156
+ const seg = t.split(/\s*[|·]\s*/)[0].trim();
157
+ if (seg.length >= 8)
158
+ t = seg;
159
+ if (t.length > 90)
160
+ t = t.slice(0, 90).replace(/\s+\S*$/, '').trim() + '…';
161
+ return t.slice(0, 120);
162
+ }
163
+ // Download a source video across YouTube / TikTok / Douyin / Facebook.
164
+ // yt-dlp supports all of them, but a single rigid `-f` that works for
165
+ // YouTube fails on the others, so try a tolerant 720p-capped format then
166
+ // fall back to letting yt-dlp pick. UA + optional cookies harden the
167
+ // non-YouTube extractors (FB private/page + Douyin need cookies).
168
+ async function downloadSourceVideo(url, dest) {
169
+ const platform = detectPlatform(url);
170
+ const common = [
171
+ '--no-playlist', '--no-warnings', '--retries', '3', '--fragment-retries', '3',
172
+ // dest ends in .mp4 — force merged/odd containers to mp4 so the file
173
+ // lands EXACTLY at `dest` (else fs.rename → ENOENT, e.g. YouTube shorts
174
+ // where bv*+ba merges to .mkv/.webm).
175
+ '--merge-output-format', 'mp4', '--remux-video', 'mp4',
176
+ '--user-agent', SRC_UA, ...cookieArgs(), '-o', dest, url,
177
+ ];
178
+ const attempts = [
179
+ ['-f', 'bv*[height<=720]+ba/b[height<=720]/best', ...common],
180
+ ['-f', 'best/mp4', ...common], // let yt-dlp choose (TikTok/Douyin/FB quirks)
181
+ ];
182
+ let lastErr;
183
+ for (let i = 0; i < attempts.length; i++) {
184
+ try {
185
+ await run(YT_DLP, attempts[i]);
186
+ return;
187
+ }
188
+ catch (e) {
189
+ lastErr = e;
190
+ console.warn(`[analyze_video] yt-dlp attempt ${i + 1}/${attempts.length} failed (${platform}): ${String(e?.message || e).slice(0, 220)}`);
191
+ }
192
+ }
193
+ const hint = (platform === 'facebook' || platform === 'douyin')
194
+ ? ' — FB private/page & some Douyin need cookies (set YT_DLP_COOKIES)'
195
+ : '';
196
+ throw new Error(`yt-dlp failed for ${platform} after ${attempts.length} attempts${hint}: ${String(lastErr?.message || lastErr).slice(0, 300)}`);
197
+ }
119
198
  async function whisperTranscribe(audioPath) {
120
199
  if (!OPENAI_KEY)
121
200
  throw new Error('OPENAI_API_KEY not set');
@@ -450,7 +529,7 @@ export async function analyzeVideo(url, onProgress) {
450
529
  // analyze of the same URL never reads a half-written file.
451
530
  const dlTmp = path.join(CACHE_DIR, `${urlHash}.dl-${crypto.randomBytes(4).toString('hex')}.mp4`);
452
531
  try {
453
- await run(YT_DLP, ['-f', 'best[height<=720]/best', '-o', dlTmp, '--no-playlist', '--quiet', url]);
532
+ await downloadSourceVideo(url, dlTmp);
454
533
  await fs.rename(dlTmp, videoPath);
455
534
  }
456
535
  catch (e) {
@@ -462,8 +541,8 @@ export async function analyzeVideo(url, onProgress) {
462
541
  // clone idea gets a real name instead of "Clone: www.youtube.com".
463
542
  let source_title = '';
464
543
  try {
465
- const t = await run(YT_DLP, ['--skip-download', '--no-warnings', '--no-playlist', '--print', '%(title)s', url]);
466
- source_title = (t.out || '').trim().split('\n')[0].slice(0, 200);
544
+ const t = await run(YT_DLP, ['--skip-download', '--no-warnings', '--no-playlist', '--user-agent', SRC_UA, ...cookieArgs(), '--print', '%(title)s', url]);
545
+ source_title = cleanSourceTitle(t.out);
467
546
  }
468
547
  catch { /* title is best-effort — analysis still proceeds without it */ }
469
548
  progress('Đang tách audio...');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tuna-agent",
3
- "version": "0.1.155",
3
+ "version": "0.1.157",
4
4
  "description": "Tuna Agent - Run AI coding tasks on your machine",
5
5
  "bin": {
6
6
  "tuna-agent": "dist/cli/index.js"