tuna-agent 0.1.136 → 0.1.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon/analyze-video-handler.js +129 -41
- package/package.json +1 -1
|
@@ -14,6 +14,49 @@ const OPENAI_KEY = process.env.OPENAI_API_KEY || '';
|
|
|
14
14
|
const YT_DLP = process.env.YT_DLP_BIN || '/home/gatoasang94/.local/bin/yt-dlp';
|
|
15
15
|
const FFMPEG = process.env.FFMPEG_BIN || '/usr/bin/ffmpeg';
|
|
16
16
|
const FFPROBE = process.env.FFPROBE_BIN || '/usr/bin/ffprobe';
|
|
17
|
+
// Downloaded source videos are cached by URL hash so re-analyze doesn't
|
|
18
|
+
// re-download (saves bandwidth + time on long clips). relabs01 shares disk
|
|
19
|
+
// with Demucs + the local media server, so the cache is bounded: drop files
|
|
20
|
+
// older than 7 days, then if the total still exceeds 15 GB evict oldest-first.
|
|
21
|
+
const CACHE_DIR = path.join(os.homedir(), '.tuna-analyze-cache');
|
|
22
|
+
const CACHE_MAX_AGE_MS = 7 * 24 * 3600 * 1000;
|
|
23
|
+
const CACHE_MAX_BYTES = 15 * 1024 * 1024 * 1024;
|
|
24
|
+
async function pruneVideoCache() {
|
|
25
|
+
try {
|
|
26
|
+
await fs.mkdir(CACHE_DIR, { recursive: true });
|
|
27
|
+
const names = await fs.readdir(CACHE_DIR);
|
|
28
|
+
const now = Date.now();
|
|
29
|
+
const live = [];
|
|
30
|
+
for (const name of names) {
|
|
31
|
+
const p = path.join(CACHE_DIR, name);
|
|
32
|
+
try {
|
|
33
|
+
const st = await fs.stat(p);
|
|
34
|
+
if (!st.isFile())
|
|
35
|
+
continue;
|
|
36
|
+
if (now - st.mtimeMs > CACHE_MAX_AGE_MS) {
|
|
37
|
+
await fs.rm(p, { force: true });
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
live.push({ p, size: st.size, mtime: st.mtimeMs });
|
|
41
|
+
}
|
|
42
|
+
catch { /* race with another run deleting it — ignore */ }
|
|
43
|
+
}
|
|
44
|
+
let total = live.reduce((s, f) => s + f.size, 0);
|
|
45
|
+
if (total > CACHE_MAX_BYTES) {
|
|
46
|
+
live.sort((a, b) => a.mtime - b.mtime); // oldest first
|
|
47
|
+
for (const f of live) {
|
|
48
|
+
if (total <= CACHE_MAX_BYTES)
|
|
49
|
+
break;
|
|
50
|
+
try {
|
|
51
|
+
await fs.rm(f.p, { force: true });
|
|
52
|
+
total -= f.size;
|
|
53
|
+
}
|
|
54
|
+
catch { /* ignore */ }
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
catch { /* cache pruning is best-effort; never block analysis */ }
|
|
59
|
+
}
|
|
17
60
|
function run(cmd, args, opts = {}) {
|
|
18
61
|
return new Promise((resolve, reject) => {
|
|
19
62
|
const p = spawn(cmd, args, { stdio: ['ignore', 'pipe', 'pipe'], ...opts });
|
|
@@ -224,14 +267,41 @@ export async function analyzeVideo(url, onProgress) {
|
|
|
224
267
|
const progress = onProgress || (() => { });
|
|
225
268
|
const tmpDir = path.join(os.tmpdir(), 'tuna-analyze-' + crypto.randomBytes(6).toString('hex'));
|
|
226
269
|
await fs.mkdir(tmpDir, { recursive: true });
|
|
227
|
-
|
|
270
|
+
// Video lives in the persistent URL-keyed cache (NOT tmpDir) so re-analyze
|
|
271
|
+
// reuses it. Only audio/frames are per-run + cleaned up in `finally`.
|
|
272
|
+
const urlHash = crypto.createHash('sha1').update(url).digest('hex');
|
|
273
|
+
const videoPath = path.join(CACHE_DIR, `${urlHash}.mp4`);
|
|
228
274
|
const audioPath = path.join(tmpDir, 'audio.mp3');
|
|
229
275
|
const framesDir = path.join(tmpDir, 'frames');
|
|
230
276
|
await fs.mkdir(framesDir, { recursive: true });
|
|
231
277
|
try {
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
278
|
+
await pruneVideoCache();
|
|
279
|
+
const cached = await fs.stat(videoPath).then(st => st.isFile() && st.size > 0).catch(() => false);
|
|
280
|
+
if (cached) {
|
|
281
|
+
progress('Dùng video đã tải (cache)...');
|
|
282
|
+
console.log('[analyze_video] Cache HIT:', videoPath);
|
|
283
|
+
// Bump mtime so an actively re-analyzed video isn't evicted by age.
|
|
284
|
+
try {
|
|
285
|
+
const now = new Date();
|
|
286
|
+
await fs.utimes(videoPath, now, now);
|
|
287
|
+
}
|
|
288
|
+
catch { /* ignore */ }
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
progress('Đang tải video...');
|
|
292
|
+
console.log('[analyze_video] Cache MISS, downloading:', url);
|
|
293
|
+
// Download to a temp name then atomically rename in, so a concurrent
|
|
294
|
+
// analyze of the same URL never reads a half-written file.
|
|
295
|
+
const dlTmp = path.join(CACHE_DIR, `${urlHash}.dl-${crypto.randomBytes(4).toString('hex')}.mp4`);
|
|
296
|
+
try {
|
|
297
|
+
await run(YT_DLP, ['-f', 'best[height<=720]/best', '-o', dlTmp, '--no-playlist', '--quiet', url]);
|
|
298
|
+
await fs.rename(dlTmp, videoPath);
|
|
299
|
+
}
|
|
300
|
+
catch (e) {
|
|
301
|
+
await fs.rm(dlTmp, { force: true }).catch(() => { });
|
|
302
|
+
throw e;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
235
305
|
// Grab the original video title (metadata only, no extra download) so the
|
|
236
306
|
// clone idea gets a real name instead of "Clone: www.youtube.com".
|
|
237
307
|
let source_title = '';
|
|
@@ -269,56 +339,74 @@ export async function analyzeVideo(url, onProgress) {
|
|
|
269
339
|
// 90s monologue becomes ~11 scenes instead of one giant clip. A hard
|
|
270
340
|
// ceiling still bounds runaway vision cost on very long videos.
|
|
271
341
|
const TARGET_SCENE_SEC = 8;
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
//
|
|
276
|
-
//
|
|
277
|
-
//
|
|
278
|
-
const
|
|
279
|
-
|
|
280
|
-
if (span <= TARGET_SCENE_SEC * 1.5) {
|
|
281
|
-
sceneSlots.push({ start, end, voiceover });
|
|
282
|
-
return;
|
|
283
|
-
}
|
|
284
|
-
const n = Math.ceil(span / TARGET_SCENE_SEC);
|
|
285
|
-
const step = span / n;
|
|
286
|
-
for (let k = 0; k < n; k++) {
|
|
287
|
-
sceneSlots.push({
|
|
288
|
-
start: start + k * step,
|
|
289
|
-
end: k === n - 1 ? end : start + (k + 1) * step,
|
|
290
|
-
voiceover: k === 0 ? voiceover : '',
|
|
291
|
-
});
|
|
292
|
-
}
|
|
293
|
-
};
|
|
342
|
+
// Safety ceiling ONLY (≈80 min @ 8s). It must NOT be derived from
|
|
343
|
+
// ceil(duration/8): Whisper emits hundreds of 2-4s segments for a talky
|
|
344
|
+
// video, so a tighter cap + slice() silently dropped the back half of
|
|
345
|
+
// the video (13-min clip → 118 slots → only first 6:21 kept). The
|
|
346
|
+
// normalise pass below already collapses tiny segments into ~8s scenes,
|
|
347
|
+
// so the natural count ≈ ceil(duration/8) and this only guards runaway.
|
|
348
|
+
const HARD_CAP = 600;
|
|
349
|
+
const spans = [];
|
|
294
350
|
if (segments.length > 0) {
|
|
295
|
-
if (segments[0].start > SILENCE_THRESHOLD)
|
|
296
|
-
|
|
297
|
-
}
|
|
351
|
+
if (segments[0].start > SILENCE_THRESHOLD)
|
|
352
|
+
spans.push({ start: 0, end: segments[0].start, voiceover: '' });
|
|
298
353
|
for (let i = 0; i < segments.length; i++) {
|
|
299
354
|
const seg = segments[i];
|
|
300
|
-
|
|
355
|
+
spans.push({ start: seg.start, end: seg.end, voiceover: seg.text?.trim() || '' });
|
|
301
356
|
if (i < segments.length - 1) {
|
|
302
357
|
const gap = segments[i + 1].start - seg.end;
|
|
303
|
-
if (gap > SILENCE_THRESHOLD)
|
|
304
|
-
|
|
305
|
-
}
|
|
358
|
+
if (gap > SILENCE_THRESHOLD)
|
|
359
|
+
spans.push({ start: seg.end, end: segments[i + 1].start, voiceover: '' });
|
|
306
360
|
}
|
|
307
361
|
}
|
|
308
362
|
const lastEnd = segments[segments.length - 1].end;
|
|
309
|
-
if (durationSec - lastEnd > SILENCE_THRESHOLD)
|
|
310
|
-
|
|
311
|
-
}
|
|
363
|
+
if (durationSec - lastEnd > SILENCE_THRESHOLD)
|
|
364
|
+
spans.push({ start: lastEnd, end: durationSec, voiceover: '' });
|
|
312
365
|
}
|
|
313
366
|
else {
|
|
314
|
-
// No transcript — split into scenes every 8s (Veo3 clip length)
|
|
315
367
|
for (let t = 0; t < durationSec; t += TARGET_SCENE_SEC) {
|
|
316
|
-
|
|
368
|
+
spans.push({ start: t, end: Math.min(t + TARGET_SCENE_SEC, durationSec), voiceover: '' });
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
// 2) Normalise every span to ~TARGET-second scenes covering the FULL
|
|
372
|
+
// timeline:
|
|
373
|
+
// - long span (> 1.5×TARGET): split into ceil(span/TARGET) equal slots
|
|
374
|
+
// - short spans: greedily MERGE consecutive ones until ≈TARGET so a
|
|
375
|
+
// talky video becomes ~ceil(duration/8) Veo3-length scenes instead
|
|
376
|
+
// of hundreds of 2s fragments — crucially WITHOUT dropping the tail.
|
|
377
|
+
for (let i = 0; i < spans.length;) {
|
|
378
|
+
const s = spans[i];
|
|
379
|
+
const span = s.end - s.start;
|
|
380
|
+
if (span > TARGET_SCENE_SEC * 1.5) {
|
|
381
|
+
const n = Math.ceil(span / TARGET_SCENE_SEC);
|
|
382
|
+
const step = span / n;
|
|
383
|
+
for (let k = 0; k < n; k++) {
|
|
384
|
+
sceneSlots.push({
|
|
385
|
+
start: s.start + k * step,
|
|
386
|
+
end: k === n - 1 ? s.end : s.start + (k + 1) * step,
|
|
387
|
+
voiceover: k === 0 ? s.voiceover : '',
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
i++;
|
|
391
|
+
}
|
|
392
|
+
else {
|
|
393
|
+
let end = s.end;
|
|
394
|
+
const vo = s.voiceover ? [s.voiceover] : [];
|
|
395
|
+
let j = i + 1;
|
|
396
|
+
while (j < spans.length &&
|
|
397
|
+
(end - s.start) < TARGET_SCENE_SEC &&
|
|
398
|
+
(spans[j].end - s.start) <= TARGET_SCENE_SEC * 1.5) {
|
|
399
|
+
end = spans[j].end;
|
|
400
|
+
if (spans[j].voiceover)
|
|
401
|
+
vo.push(spans[j].voiceover);
|
|
402
|
+
j++;
|
|
403
|
+
}
|
|
404
|
+
sceneSlots.push({ start: s.start, end, voiceover: vo.join(' ') });
|
|
405
|
+
i = j;
|
|
317
406
|
}
|
|
318
407
|
}
|
|
319
|
-
//
|
|
320
|
-
|
|
321
|
-
const finalSlots = sceneSlots.slice(0, MAX_SCENES);
|
|
408
|
+
// slice() now only ever trims pathological >80-min inputs.
|
|
409
|
+
const finalSlots = sceneSlots.slice(0, HARD_CAP);
|
|
322
410
|
progress(`Đang cắt ${finalSlots.length} frames và phân tích...`);
|
|
323
411
|
console.log('[analyze_video] Building', finalSlots.length, 'scenes (segments:', segments.length, ', duration:', durationSec, 's)');
|
|
324
412
|
// Step 1: Extract frames sequentially. Per scene we grab 3 chronological
|