tuna-agent 0.1.120 → 0.1.122

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,6 @@ export interface AnalyzeVideoResult {
26
26
  isError?: boolean;
27
27
  error?: string;
28
28
  }
29
- export declare function analyzeVideo(url: string): Promise<AnalyzeVideoResult>;
30
- /** Extension task handler — wraps analyzeVideo with WS response */
29
+ export declare function analyzeVideo(url: string, onProgress?: (step: string) => void): Promise<AnalyzeVideoResult>;
30
+ /** Extension task handler — wraps analyzeVideo with WS response + progress */
31
31
  export declare function handleAnalyzeVideo(ws: AgentWebSocketClient, code: string, taskId: string, url: string): Promise<void>;
@@ -97,7 +97,8 @@ async function visionDescribe(frameB64, voiceoverText) {
97
97
  const data = await res.json();
98
98
  return data.choices?.[0]?.message?.content?.trim() || '';
99
99
  }
100
- export async function analyzeVideo(url) {
100
+ export async function analyzeVideo(url, onProgress) {
101
+ const progress = onProgress || (() => { });
101
102
  const tmpDir = path.join(os.tmpdir(), 'tuna-analyze-' + crypto.randomBytes(6).toString('hex'));
102
103
  await fs.mkdir(tmpDir, { recursive: true });
103
104
  const videoPath = path.join(tmpDir, 'video.mp4');
@@ -105,8 +106,10 @@ export async function analyzeVideo(url) {
105
106
  const framesDir = path.join(tmpDir, 'frames');
106
107
  await fs.mkdir(framesDir, { recursive: true });
107
108
  try {
109
+ progress('Đang tải video...');
108
110
  console.log('[analyze_video] Downloading:', url);
109
111
  await run(YT_DLP, ['-f', 'best[height<=720]/best', '-o', videoPath, '--no-playlist', '--quiet', url]);
112
+ progress('Đang tách audio...');
110
113
  console.log('[analyze_video] Extracting audio');
111
114
  await run(FFMPEG, ['-y', '-i', videoPath, '-vn', '-ar', '16000', '-ac', '1', '-b:a', '64k', audioPath, '-loglevel', 'error']);
112
115
  console.log('[analyze_video] Probing duration');
@@ -119,35 +122,72 @@ export async function analyzeVideo(url) {
119
122
  return 0;
120
123
  }
121
124
  })();
125
+ progress('Đang transcribe bằng Whisper...');
122
126
  console.log('[analyze_video] Transcribing via Whisper');
123
127
  const rawTranscript = await whisperTranscribe(audioPath);
128
+ progress('Đang sửa lỗi transcript...');
124
129
  console.log('[analyze_video] AI correcting transcript');
125
130
  const correctedText = await correctTranscript(rawTranscript.text, rawTranscript.language);
126
131
  const transcript = { ...rawTranscript, text: correctedText };
127
132
  const segments = transcript.segments || [];
128
- console.log('[analyze_video] Extracting', segments.length, 'frames (1 per segment)');
133
+ const sceneSlots = [];
134
+ const SILENCE_THRESHOLD = 5; // seconds — gaps longer than this become their own scene
135
+ const MAX_SCENES = 30;
136
+ if (segments.length > 0) {
137
+ // Add silence scene before first segment if gap > threshold
138
+ if (segments[0].start > SILENCE_THRESHOLD) {
139
+ sceneSlots.push({ start: 0, end: segments[0].start, voiceover: '' });
140
+ }
141
+ for (let i = 0; i < segments.length; i++) {
142
+ const seg = segments[i];
143
+ sceneSlots.push({ start: seg.start, end: seg.end, voiceover: seg.text?.trim() || '' });
144
+ // Add silence scene between segments if gap > threshold
145
+ if (i < segments.length - 1) {
146
+ const gap = segments[i + 1].start - seg.end;
147
+ if (gap > SILENCE_THRESHOLD) {
148
+ sceneSlots.push({ start: seg.end, end: segments[i + 1].start, voiceover: '' });
149
+ }
150
+ }
151
+ }
152
+ // Add silence scene after last segment if gap > threshold
153
+ const lastEnd = segments[segments.length - 1].end;
154
+ if (durationSec - lastEnd > SILENCE_THRESHOLD) {
155
+ sceneSlots.push({ start: lastEnd, end: durationSec, voiceover: '' });
156
+ }
157
+ }
158
+ else {
159
+ // No transcript — split into scenes every 8s (Veo3 clip length)
160
+ const interval = 8;
161
+ for (let t = 0; t < durationSec; t += interval) {
162
+ sceneSlots.push({ start: t, end: Math.min(t + interval, durationSec), voiceover: '' });
163
+ }
164
+ }
165
+ // Cap max scenes
166
+ const finalSlots = sceneSlots.slice(0, MAX_SCENES);
167
+ progress(`Đang cắt ${finalSlots.length} frames và phân tích...`);
168
+ console.log('[analyze_video] Building', finalSlots.length, 'scenes (segments:', segments.length, ', duration:', durationSec, 's)');
129
169
  const scenes = [];
130
- for (let i = 0; i < segments.length; i++) {
131
- const seg = segments[i];
132
- const midpoint = (seg.start + seg.end) / 2;
133
- const framePath = path.join(framesDir, `seg-${String(i).padStart(3, '0')}.jpg`);
170
+ for (let i = 0; i < finalSlots.length; i++) {
171
+ const slot = finalSlots[i];
172
+ const midpoint = (slot.start + slot.end) / 2;
173
+ const framePath = path.join(framesDir, `scene-${String(i).padStart(3, '0')}.jpg`);
134
174
  try {
175
+ progress(`Đang phân tích scene ${i + 1}/${finalSlots.length}...`);
135
176
  await run(FFMPEG, ['-y', '-ss', String(midpoint), '-i', videoPath, '-vframes', '1', '-vf', 'scale=640:-1', '-q:v', '5', framePath, '-loglevel', 'error']);
136
177
  const buf = await fs.readFile(framePath);
137
- const voiceover = seg.text?.trim() || '';
138
- const visual_description = await visionDescribe(buf.toString('base64'), voiceover);
178
+ const visual_description = await visionDescribe(buf.toString('base64'), slot.voiceover);
139
179
  scenes.push({
140
180
  scene_number: i + 1,
141
- timestamp_start: Math.round(seg.start * 10) / 10,
142
- timestamp_end: Math.round(seg.end * 10) / 10,
181
+ timestamp_start: Math.round(slot.start * 10) / 10,
182
+ timestamp_end: Math.round(slot.end * 10) / 10,
143
183
  thumbnail_base64: buf.toString('base64'),
144
- voiceover,
184
+ voiceover: slot.voiceover,
145
185
  visual_description,
146
186
  });
147
187
  }
148
188
  catch (err) {
149
189
  const msg = err instanceof Error ? err.message : String(err);
150
- console.warn('[analyze_video] Frame extract failed for segment', i, msg);
190
+ console.warn('[analyze_video] Frame extract failed for scene', i, msg);
151
191
  }
152
192
  }
153
193
  return {
@@ -165,11 +205,13 @@ export async function analyzeVideo(url) {
165
205
  catch { /* ignore */ }
166
206
  }
167
207
  }
168
- /** Extension task handler — wraps analyzeVideo with WS response */
208
+ /** Extension task handler — wraps analyzeVideo with WS response + progress */
169
209
  export async function handleAnalyzeVideo(ws, code, taskId, url) {
170
210
  console.log(`[analyze_video] Starting for ${url}`);
171
211
  try {
172
- const result = await analyzeVideo(url);
212
+ const result = await analyzeVideo(url, (step) => {
213
+ ws.sendExtensionStream(code, taskId, step);
214
+ });
173
215
  ws.sendExtensionDone(code, taskId, { ...result, isError: false });
174
216
  }
175
217
  catch (err) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tuna-agent",
3
- "version": "0.1.120",
3
+ "version": "0.1.122",
4
4
  "description": "Tuna Agent - Run AI coding tasks on your machine",
5
5
  "bin": {
6
6
  "tuna-agent": "dist/cli/index.js"