tuna-agent 0.1.120 → 0.1.122
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -26,6 +26,6 @@ export interface AnalyzeVideoResult {
|
|
|
26
26
|
isError?: boolean;
|
|
27
27
|
error?: string;
|
|
28
28
|
}
|
|
29
|
-
export declare function analyzeVideo(url: string): Promise<AnalyzeVideoResult>;
|
|
30
|
-
/** Extension task handler — wraps analyzeVideo with WS response */
|
|
29
|
+
export declare function analyzeVideo(url: string, onProgress?: (step: string) => void): Promise<AnalyzeVideoResult>;
|
|
30
|
+
/** Extension task handler — wraps analyzeVideo with WS response + progress */
|
|
31
31
|
export declare function handleAnalyzeVideo(ws: AgentWebSocketClient, code: string, taskId: string, url: string): Promise<void>;
|
|
@@ -97,7 +97,8 @@ async function visionDescribe(frameB64, voiceoverText) {
|
|
|
97
97
|
const data = await res.json();
|
|
98
98
|
return data.choices?.[0]?.message?.content?.trim() || '';
|
|
99
99
|
}
|
|
100
|
-
export async function analyzeVideo(url) {
|
|
100
|
+
export async function analyzeVideo(url, onProgress) {
|
|
101
|
+
const progress = onProgress || (() => { });
|
|
101
102
|
const tmpDir = path.join(os.tmpdir(), 'tuna-analyze-' + crypto.randomBytes(6).toString('hex'));
|
|
102
103
|
await fs.mkdir(tmpDir, { recursive: true });
|
|
103
104
|
const videoPath = path.join(tmpDir, 'video.mp4');
|
|
@@ -105,8 +106,10 @@ export async function analyzeVideo(url) {
|
|
|
105
106
|
const framesDir = path.join(tmpDir, 'frames');
|
|
106
107
|
await fs.mkdir(framesDir, { recursive: true });
|
|
107
108
|
try {
|
|
109
|
+
progress('Đang tải video...');
|
|
108
110
|
console.log('[analyze_video] Downloading:', url);
|
|
109
111
|
await run(YT_DLP, ['-f', 'best[height<=720]/best', '-o', videoPath, '--no-playlist', '--quiet', url]);
|
|
112
|
+
progress('Đang tách audio...');
|
|
110
113
|
console.log('[analyze_video] Extracting audio');
|
|
111
114
|
await run(FFMPEG, ['-y', '-i', videoPath, '-vn', '-ar', '16000', '-ac', '1', '-b:a', '64k', audioPath, '-loglevel', 'error']);
|
|
112
115
|
console.log('[analyze_video] Probing duration');
|
|
@@ -119,35 +122,72 @@ export async function analyzeVideo(url) {
|
|
|
119
122
|
return 0;
|
|
120
123
|
}
|
|
121
124
|
})();
|
|
125
|
+
progress('Đang transcribe bằng Whisper...');
|
|
122
126
|
console.log('[analyze_video] Transcribing via Whisper');
|
|
123
127
|
const rawTranscript = await whisperTranscribe(audioPath);
|
|
128
|
+
progress('Đang sửa lỗi transcript...');
|
|
124
129
|
console.log('[analyze_video] AI correcting transcript');
|
|
125
130
|
const correctedText = await correctTranscript(rawTranscript.text, rawTranscript.language);
|
|
126
131
|
const transcript = { ...rawTranscript, text: correctedText };
|
|
127
132
|
const segments = transcript.segments || [];
|
|
128
|
-
|
|
133
|
+
const sceneSlots = [];
|
|
134
|
+
const SILENCE_THRESHOLD = 5; // seconds — gaps longer than this become their own scene
|
|
135
|
+
const MAX_SCENES = 30;
|
|
136
|
+
if (segments.length > 0) {
|
|
137
|
+
// Add silence scene before first segment if gap > threshold
|
|
138
|
+
if (segments[0].start > SILENCE_THRESHOLD) {
|
|
139
|
+
sceneSlots.push({ start: 0, end: segments[0].start, voiceover: '' });
|
|
140
|
+
}
|
|
141
|
+
for (let i = 0; i < segments.length; i++) {
|
|
142
|
+
const seg = segments[i];
|
|
143
|
+
sceneSlots.push({ start: seg.start, end: seg.end, voiceover: seg.text?.trim() || '' });
|
|
144
|
+
// Add silence scene between segments if gap > threshold
|
|
145
|
+
if (i < segments.length - 1) {
|
|
146
|
+
const gap = segments[i + 1].start - seg.end;
|
|
147
|
+
if (gap > SILENCE_THRESHOLD) {
|
|
148
|
+
sceneSlots.push({ start: seg.end, end: segments[i + 1].start, voiceover: '' });
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Add silence scene after last segment if gap > threshold
|
|
153
|
+
const lastEnd = segments[segments.length - 1].end;
|
|
154
|
+
if (durationSec - lastEnd > SILENCE_THRESHOLD) {
|
|
155
|
+
sceneSlots.push({ start: lastEnd, end: durationSec, voiceover: '' });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// No transcript — split into scenes every 8s (Veo3 clip length)
|
|
160
|
+
const interval = 8;
|
|
161
|
+
for (let t = 0; t < durationSec; t += interval) {
|
|
162
|
+
sceneSlots.push({ start: t, end: Math.min(t + interval, durationSec), voiceover: '' });
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// Cap max scenes
|
|
166
|
+
const finalSlots = sceneSlots.slice(0, MAX_SCENES);
|
|
167
|
+
progress(`Đang cắt ${finalSlots.length} frames và phân tích...`);
|
|
168
|
+
console.log('[analyze_video] Building', finalSlots.length, 'scenes (segments:', segments.length, ', duration:', durationSec, 's)');
|
|
129
169
|
const scenes = [];
|
|
130
|
-
for (let i = 0; i <
|
|
131
|
-
const
|
|
132
|
-
const midpoint = (
|
|
133
|
-
const framePath = path.join(framesDir, `
|
|
170
|
+
for (let i = 0; i < finalSlots.length; i++) {
|
|
171
|
+
const slot = finalSlots[i];
|
|
172
|
+
const midpoint = (slot.start + slot.end) / 2;
|
|
173
|
+
const framePath = path.join(framesDir, `scene-${String(i).padStart(3, '0')}.jpg`);
|
|
134
174
|
try {
|
|
175
|
+
progress(`Đang phân tích scene ${i + 1}/${finalSlots.length}...`);
|
|
135
176
|
await run(FFMPEG, ['-y', '-ss', String(midpoint), '-i', videoPath, '-vframes', '1', '-vf', 'scale=640:-1', '-q:v', '5', framePath, '-loglevel', 'error']);
|
|
136
177
|
const buf = await fs.readFile(framePath);
|
|
137
|
-
const
|
|
138
|
-
const visual_description = await visionDescribe(buf.toString('base64'), voiceover);
|
|
178
|
+
const visual_description = await visionDescribe(buf.toString('base64'), slot.voiceover);
|
|
139
179
|
scenes.push({
|
|
140
180
|
scene_number: i + 1,
|
|
141
|
-
timestamp_start: Math.round(
|
|
142
|
-
timestamp_end: Math.round(
|
|
181
|
+
timestamp_start: Math.round(slot.start * 10) / 10,
|
|
182
|
+
timestamp_end: Math.round(slot.end * 10) / 10,
|
|
143
183
|
thumbnail_base64: buf.toString('base64'),
|
|
144
|
-
voiceover,
|
|
184
|
+
voiceover: slot.voiceover,
|
|
145
185
|
visual_description,
|
|
146
186
|
});
|
|
147
187
|
}
|
|
148
188
|
catch (err) {
|
|
149
189
|
const msg = err instanceof Error ? err.message : String(err);
|
|
150
|
-
console.warn('[analyze_video] Frame extract failed for
|
|
190
|
+
console.warn('[analyze_video] Frame extract failed for scene', i, msg);
|
|
151
191
|
}
|
|
152
192
|
}
|
|
153
193
|
return {
|
|
@@ -165,11 +205,13 @@ export async function analyzeVideo(url) {
|
|
|
165
205
|
catch { /* ignore */ }
|
|
166
206
|
}
|
|
167
207
|
}
|
|
168
|
-
/** Extension task handler — wraps analyzeVideo with WS response */
|
|
208
|
+
/** Extension task handler — wraps analyzeVideo with WS response + progress */
|
|
169
209
|
export async function handleAnalyzeVideo(ws, code, taskId, url) {
|
|
170
210
|
console.log(`[analyze_video] Starting for ${url}`);
|
|
171
211
|
try {
|
|
172
|
-
const result = await analyzeVideo(url)
|
|
212
|
+
const result = await analyzeVideo(url, (step) => {
|
|
213
|
+
ws.sendExtensionStream(code, taskId, step);
|
|
214
|
+
});
|
|
173
215
|
ws.sendExtensionDone(code, taskId, { ...result, isError: false });
|
|
174
216
|
}
|
|
175
217
|
catch (err) {
|