@lightcone-ai/daemon 0.15.71 → 0.15.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp-servers/publisher/adapters/kuaishou.js +2 -2
- package/package.json +1 -1
- package/src/_vendor/video/composer-v2/index.js +47 -6
- package/src/_vendor/video/recorder/index.js +120 -55
- package/src/_vendor/video/recorder/plan-executor.js +147 -42
- package/src/chat-bridge.js +4 -4
- package/src/tools/plan-video-segments.js +43 -10
|
@@ -56,7 +56,7 @@ export class KuaishouAdapter {
|
|
|
56
56
|
await this._clickByText('放弃');
|
|
57
57
|
await sleep(500);
|
|
58
58
|
try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
|
|
59
|
-
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]',
|
|
59
|
+
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
|
|
60
60
|
|
|
61
61
|
const { loggedIn } = await this.checkLoginStatus();
|
|
62
62
|
if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
|
|
@@ -97,7 +97,7 @@ export class KuaishouAdapter {
|
|
|
97
97
|
|
|
98
98
|
// Scroll once to trigger any lazy-rendered upload widgets, then wait
|
|
99
99
|
try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
|
|
100
|
-
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]',
|
|
100
|
+
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
|
|
101
101
|
|
|
102
102
|
const { loggedIn } = await this.checkLoginStatus();
|
|
103
103
|
if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
|
package/package.json
CHANGED
|
@@ -9,7 +9,12 @@ const DEFAULT_WIDTH = 1080;
|
|
|
9
9
|
const DEFAULT_HEIGHT = 1920;
|
|
10
10
|
const DEFAULT_FPS = 30;
|
|
11
11
|
const TRANSITION_DURATION = 0.5;
|
|
12
|
-
|
|
12
|
+
// ASS `Fontname` is a single family name, not a CSS-style fallback list — a
|
|
13
|
+
// comma here shifts every subsequent field in the `Style:` line, corrupting the
|
|
14
|
+
// whole style so libass renders nothing (i.e. burned-in subtitles look missing).
|
|
15
|
+
// Use one installed family; libass + fontconfig handle glyph fallback. Override
|
|
16
|
+
// via SUBTITLE_FONT env if the deployment ships a different CJK font.
|
|
17
|
+
const SUBTITLE_FONT = (process.env.SUBTITLE_FONT || 'Noto Sans CJK SC').split(',')[0].trim() || 'Noto Sans CJK SC';
|
|
13
18
|
const SUBTITLE_FONT_SIZE = 72;
|
|
14
19
|
const SUBTITLE_MARGIN_V = 120;
|
|
15
20
|
|
|
@@ -23,6 +28,28 @@ function msToAssTimestamp(ms) {
|
|
|
23
28
|
return `${hr}:${String(min).padStart(2, '0')}:${String(sec).padStart(2, '0')}.${String(cs).padStart(2, '0')}`;
|
|
24
29
|
}
|
|
25
30
|
|
|
31
|
+
// Split a subtitle block into display-sized sentence units. Breaks on CJK/ASCII
|
|
32
|
+
// sentence punctuation and newlines; merges very short fragments forward so we
|
|
33
|
+
// don't flash one-character lines.
|
|
34
|
+
function splitSubtitleSentences(text) {
|
|
35
|
+
const raw = String(text ?? '').trim();
|
|
36
|
+
if (!raw) return [];
|
|
37
|
+
const pieces = raw
|
|
38
|
+
.split(/(?<=[。!?!?;;\n])/u)
|
|
39
|
+
.map(s => s.replace(/\s+/g, ' ').trim())
|
|
40
|
+
.filter(Boolean);
|
|
41
|
+
if (pieces.length <= 1) return [raw];
|
|
42
|
+
const merged = [];
|
|
43
|
+
for (const piece of pieces) {
|
|
44
|
+
if (merged.length > 0 && Array.from(merged[merged.length - 1]).length < 6) {
|
|
45
|
+
merged[merged.length - 1] = `${merged[merged.length - 1]}${piece}`;
|
|
46
|
+
} else {
|
|
47
|
+
merged.push(piece);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return merged;
|
|
51
|
+
}
|
|
52
|
+
|
|
26
53
|
function wrapSubtitleText(text, maxChars = 14) {
|
|
27
54
|
const chars = Array.from(String(text ?? ''));
|
|
28
55
|
if (chars.length <= maxChars) return chars.join('');
|
|
@@ -311,15 +338,29 @@ export async function composeVideoV2({
|
|
|
311
338
|
}
|
|
312
339
|
}
|
|
313
340
|
|
|
314
|
-
// Build subtitle entries with cumulative timeline timestamps
|
|
341
|
+
// Build subtitle entries with cumulative timeline timestamps. When a clip's
|
|
342
|
+
// subtitle text spans several sentences, split it into one event per sentence
|
|
343
|
+
// and spread them across the clip in proportion to their length, so a long
|
|
344
|
+
// beat reads as sequential lines roughly tracking the narration instead of one
|
|
345
|
+
// static wall of text.
|
|
315
346
|
let cursorMs = 0;
|
|
316
347
|
const subtitleEntries = [];
|
|
317
348
|
for (const clip of readyClips) {
|
|
318
349
|
if (clip.subtitleText) {
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
350
|
+
const clipMs = Math.round(clip.duration * 1000);
|
|
351
|
+
const sentences = splitSubtitleSentences(clip.subtitleText);
|
|
352
|
+
const totalLen = sentences.reduce((sum, s) => sum + Array.from(s).length, 0) || 1;
|
|
353
|
+
let offsetMs = 0;
|
|
354
|
+
sentences.forEach((sentence, idx) => {
|
|
355
|
+
const share = Array.from(sentence).length / totalLen;
|
|
356
|
+
const isLast = idx === sentences.length - 1;
|
|
357
|
+
const spanMs = isLast ? clipMs - offsetMs : Math.max(1, Math.round(clipMs * share));
|
|
358
|
+
subtitleEntries.push({
|
|
359
|
+
text: sentence,
|
|
360
|
+
start_ms: cursorMs + offsetMs,
|
|
361
|
+
end_ms: cursorMs + offsetMs + spanMs,
|
|
362
|
+
});
|
|
363
|
+
offsetMs += spanMs;
|
|
323
364
|
});
|
|
324
365
|
}
|
|
325
366
|
cursorMs += Math.round(clip.duration * 1000);
|
|
@@ -1,17 +1,12 @@
|
|
|
1
1
|
import { spawn } from 'node:child_process';
|
|
2
2
|
import { mkdirSync } from 'node:fs';
|
|
3
|
-
import { stat, writeFile } from 'node:fs/promises';
|
|
3
|
+
import { mkdtemp, rm, stat, writeFile } from 'node:fs/promises';
|
|
4
|
+
import os from 'node:os';
|
|
4
5
|
import path from 'node:path';
|
|
5
6
|
|
|
6
7
|
import { launchChromiumMobile, openPageAndSettle } from './chromium-driver.js';
|
|
7
8
|
import { defaultDisplayPool } from './display-pool.js';
|
|
8
|
-
import {
|
|
9
|
-
createUnexpectedExitWatcher,
|
|
10
|
-
startFfmpegCapture,
|
|
11
|
-
stopFfmpegCapture,
|
|
12
|
-
waitForProcessExit,
|
|
13
|
-
} from './ffmpeg-runner.js';
|
|
14
|
-
import { estimatePlanDurationMs } from './plan-estimator.js';
|
|
9
|
+
import { createUnexpectedExitWatcher, waitForProcessExit } from './ffmpeg-runner.js';
|
|
15
10
|
import { executePlanPhases, normalizePlanPhases } from './plan-executor.js';
|
|
16
11
|
|
|
17
12
|
const DEFAULT_VIEWPORT = Object.freeze({ width: 1080, height: 1920 });
|
|
@@ -177,6 +172,49 @@ function scalePhaseY(phase, zoom) {
|
|
|
177
172
|
};
|
|
178
173
|
}
|
|
179
174
|
|
|
175
|
+
// Re-encode the page recording (webm, page content only — no browser chrome) into
|
|
176
|
+
// the mp4 the rest of the pipeline expects, dropping the head segment that covers
|
|
177
|
+
// page load + settle so the clip starts at the first plan phase.
|
|
178
|
+
async function transcodeWebmToMp4({
|
|
179
|
+
webmPath,
|
|
180
|
+
outputPath,
|
|
181
|
+
startMs = 0,
|
|
182
|
+
fps = DEFAULT_FPS,
|
|
183
|
+
ffmpegBin = 'ffmpeg',
|
|
184
|
+
} = {}) {
|
|
185
|
+
const ss = Math.max(0, Number(startMs) || 0) / 1000;
|
|
186
|
+
const args = [
|
|
187
|
+
'-y',
|
|
188
|
+
...(ss > 0 ? ['-ss', ss.toFixed(3)] : []),
|
|
189
|
+
'-i', webmPath,
|
|
190
|
+
'-an',
|
|
191
|
+
'-c:v', 'libx264',
|
|
192
|
+
'-preset', 'veryfast',
|
|
193
|
+
'-pix_fmt', 'yuv420p',
|
|
194
|
+
...(Number.isFinite(Number(fps)) && Number(fps) > 0 ? ['-r', String(fps)] : []),
|
|
195
|
+
'-movflags', '+faststart',
|
|
196
|
+
outputPath,
|
|
197
|
+
];
|
|
198
|
+
await new Promise((resolve, reject) => {
|
|
199
|
+
const proc = spawn(ffmpegBin, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
200
|
+
const errChunks = [];
|
|
201
|
+
proc.stderr?.on('data', (chunk) => errChunks.push(chunk));
|
|
202
|
+
proc.once('error', (err) => {
|
|
203
|
+
const wrapped = new Error(`ffmpeg_spawn_failed:${err.message}`);
|
|
204
|
+
wrapped.code = 'FFMPEG_SPAWN_FAILED';
|
|
205
|
+
reject(wrapped);
|
|
206
|
+
});
|
|
207
|
+
proc.on('close', (code) => {
|
|
208
|
+
if (code === 0) return resolve();
|
|
209
|
+
const wrapped = new Error(
|
|
210
|
+
`ffmpeg_transcode_failed:code=${code}: ${Buffer.concat(errChunks).toString().slice(-2000)}`
|
|
211
|
+
);
|
|
212
|
+
wrapped.code = 'FFMPEG_TRANSCODE_FAILED';
|
|
213
|
+
reject(wrapped);
|
|
214
|
+
});
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
180
218
|
export async function recordUrlNarration({
|
|
181
219
|
plan,
|
|
182
220
|
output_path,
|
|
@@ -189,11 +227,14 @@ export async function recordUrlNarration({
|
|
|
189
227
|
settle_ms = 4000,
|
|
190
228
|
page_zoom = 1.1,
|
|
191
229
|
displayPool = defaultDisplayPool,
|
|
192
|
-
ffmpegDurationBufferSec = 8,
|
|
193
230
|
startupProbeMs = 1200,
|
|
194
|
-
ffmpegStopTimeoutMs = 10000,
|
|
195
231
|
xvfbStopTimeoutMs = 5000,
|
|
196
232
|
postPlanTailMs = 600,
|
|
233
|
+
recordingDir = null,
|
|
234
|
+
launchChromiumFn = launchChromiumMobile,
|
|
235
|
+
openPageFn = openPageAndSettle,
|
|
236
|
+
transcodeFn = transcodeWebmToMp4,
|
|
237
|
+
nowMs = () => Date.now(),
|
|
197
238
|
} = {}) {
|
|
198
239
|
const zoom = Number.isFinite(Number(page_zoom)) && Number(page_zoom) > 0 ? Number(page_zoom) : 1.1;
|
|
199
240
|
const rawPhases = normalizePlanPhases(plan);
|
|
@@ -212,14 +253,14 @@ export async function recordUrlNarration({
|
|
|
212
253
|
mkdirSync(path.dirname(resolvedOutputPath), { recursive: true });
|
|
213
254
|
mkdirSync(path.dirname(resolvedEventsPath), { recursive: true });
|
|
214
255
|
|
|
256
|
+
const ownTempDir = !recordingDir;
|
|
257
|
+
const recVideoDir = recordingDir || await mkdtemp(path.join(os.tmpdir(), 'lc-recvid-'));
|
|
258
|
+
|
|
215
259
|
let displayLease;
|
|
216
260
|
let xvfb;
|
|
217
|
-
let ffmpeg;
|
|
218
|
-
let browserSession;
|
|
219
261
|
let xvfbWatcher;
|
|
220
|
-
let
|
|
262
|
+
let browserSession = null;
|
|
221
263
|
let primaryError = null;
|
|
222
|
-
|
|
223
264
|
const cleanupErrors = [];
|
|
224
265
|
|
|
225
266
|
try {
|
|
@@ -234,11 +275,26 @@ export async function recordUrlNarration({
|
|
|
234
275
|
});
|
|
235
276
|
xvfbWatcher = createUnexpectedExitWatcher(xvfb.child, 'xvfb');
|
|
236
277
|
|
|
237
|
-
|
|
278
|
+
// The page recording captures the page viewport only (no browser chrome),
|
|
279
|
+
// regardless of the on-screen window. recordVideo starts when the page is
|
|
280
|
+
// created, so the webm includes goto + settle; we measure that head and trim
|
|
281
|
+
// it off in transcodeFn.
|
|
282
|
+
const recordStartedAt = nowMs();
|
|
283
|
+
browserSession = await launchChromiumFn({
|
|
238
284
|
display,
|
|
239
285
|
viewport: normalizedViewport,
|
|
286
|
+
contextOptions: {
|
|
287
|
+
recordVideo: {
|
|
288
|
+
dir: recVideoDir,
|
|
289
|
+
size: { width: normalizedViewport.width, height: normalizedViewport.height },
|
|
290
|
+
},
|
|
291
|
+
},
|
|
240
292
|
});
|
|
241
|
-
|
|
293
|
+
const videoHandle = typeof browserSession.page.video === 'function'
|
|
294
|
+
? browserSession.page.video()
|
|
295
|
+
: null;
|
|
296
|
+
|
|
297
|
+
await openPageFn(browserSession.page, {
|
|
242
298
|
url: resolvedUrl,
|
|
243
299
|
settleMs: settle_ms,
|
|
244
300
|
});
|
|
@@ -250,42 +306,53 @@ export async function recordUrlNarration({
|
|
|
250
306
|
await browserSession.page.waitForTimeout(300);
|
|
251
307
|
}
|
|
252
308
|
|
|
253
|
-
const estimatedDurationMs = estimatePlanDurationMs(executablePlan);
|
|
254
|
-
const estimatedDurationSec = Math.max(
|
|
255
|
-
5,
|
|
256
|
-
Math.ceil(estimatedDurationMs / 1000) + Math.max(0, Number(ffmpegDurationBufferSec) || 0)
|
|
257
|
-
);
|
|
258
|
-
|
|
259
|
-
ffmpeg = await startFfmpegCapture({
|
|
260
|
-
display,
|
|
261
|
-
outputPath: resolvedOutputPath,
|
|
262
|
-
width: normalizedViewport.width,
|
|
263
|
-
height: normalizedViewport.height,
|
|
264
|
-
fps: normalizedFps,
|
|
265
|
-
durationSec: estimatedDurationSec,
|
|
266
|
-
startupProbeMs,
|
|
267
|
-
});
|
|
268
|
-
ffmpegWatcher = createUnexpectedExitWatcher(ffmpeg.child, 'ffmpeg');
|
|
269
|
-
|
|
270
309
|
await scrollToTop(browserSession.page);
|
|
271
310
|
await browserSession.page.waitForTimeout(350);
|
|
272
311
|
|
|
312
|
+
const headTrimMs = Math.max(0, nowMs() - recordStartedAt);
|
|
313
|
+
|
|
273
314
|
const eventsLog = await Promise.race([
|
|
274
315
|
executePlanPhases(browserSession.page, executablePlan),
|
|
275
316
|
xvfbWatcher.promise,
|
|
276
|
-
ffmpegWatcher.promise,
|
|
277
317
|
]);
|
|
278
318
|
|
|
279
319
|
await browserSession.page.waitForTimeout(Math.max(0, Number(postPlanTailMs) || 0));
|
|
280
320
|
|
|
281
|
-
ffmpegWatcher.deactivate();
|
|
282
|
-
await stopFfmpegCapture(ffmpeg, {
|
|
283
|
-
timeoutMs: ffmpegStopTimeoutMs,
|
|
284
|
-
});
|
|
285
|
-
|
|
286
321
|
xvfbWatcher.deactivate();
|
|
287
322
|
|
|
288
|
-
|
|
323
|
+
// Flush the recording: video is written when the context closes.
|
|
324
|
+
let webmPath = null;
|
|
325
|
+
try {
|
|
326
|
+
await browserSession.context.close();
|
|
327
|
+
} catch (closeError) {
|
|
328
|
+
cleanupErrors.push(`context_close_failed:${closeError.message}`);
|
|
329
|
+
}
|
|
330
|
+
if (videoHandle) {
|
|
331
|
+
try {
|
|
332
|
+
webmPath = await videoHandle.path();
|
|
333
|
+
} catch (pathError) {
|
|
334
|
+
cleanupErrors.push(`video_path_failed:${pathError.message}`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
try {
|
|
338
|
+
await browserSession.browser.close();
|
|
339
|
+
} catch (closeError) {
|
|
340
|
+
cleanupErrors.push(`browser_close_failed:${closeError.message}`);
|
|
341
|
+
}
|
|
342
|
+
browserSession = null;
|
|
343
|
+
|
|
344
|
+
if (!webmPath) {
|
|
345
|
+
const error = new Error('record_video_not_produced');
|
|
346
|
+
error.code = 'RECORD_VIDEO_NOT_PRODUCED';
|
|
347
|
+
throw error;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
await transcodeFn({
|
|
351
|
+
webmPath,
|
|
352
|
+
outputPath: resolvedOutputPath,
|
|
353
|
+
startMs: headTrimMs,
|
|
354
|
+
fps: normalizedFps,
|
|
355
|
+
});
|
|
289
356
|
|
|
290
357
|
const videoStat = await stat(resolvedOutputPath);
|
|
291
358
|
if (!videoStat.isFile() || videoStat.size <= 0) {
|
|
@@ -294,42 +361,36 @@ export async function recordUrlNarration({
|
|
|
294
361
|
throw error;
|
|
295
362
|
}
|
|
296
363
|
|
|
364
|
+
await writeFile(resolvedEventsPath, JSON.stringify(eventsLog, null, 2), 'utf8');
|
|
365
|
+
|
|
366
|
+
const lastTms = Array.isArray(eventsLog)
|
|
367
|
+
? eventsLog.reduce((max, ev) => Math.max(max, Number(ev?.t_ms) || 0), 0)
|
|
368
|
+
: 0;
|
|
369
|
+
|
|
297
370
|
return {
|
|
298
371
|
video_path: resolvedOutputPath,
|
|
299
372
|
events_path: resolvedEventsPath,
|
|
300
373
|
events_log: eventsLog,
|
|
374
|
+
duration_ms: lastTms > 0 ? lastTms : null,
|
|
301
375
|
display,
|
|
302
376
|
};
|
|
303
377
|
} catch (error) {
|
|
304
378
|
primaryError = error;
|
|
305
379
|
throw error;
|
|
306
380
|
} finally {
|
|
307
|
-
ffmpegWatcher?.deactivate();
|
|
308
381
|
xvfbWatcher?.deactivate();
|
|
309
382
|
|
|
310
383
|
if (browserSession) {
|
|
311
384
|
try {
|
|
312
|
-
await browserSession.close();
|
|
385
|
+
await browserSession.browser.close();
|
|
313
386
|
} catch (closeError) {
|
|
314
387
|
cleanupErrors.push(`browser_close_failed:${closeError.message}`);
|
|
315
388
|
}
|
|
316
389
|
}
|
|
317
390
|
|
|
318
|
-
if (ffmpeg) {
|
|
319
|
-
try {
|
|
320
|
-
await stopFfmpegCapture(ffmpeg, {
|
|
321
|
-
timeoutMs: ffmpegStopTimeoutMs,
|
|
322
|
-
});
|
|
323
|
-
} catch (stopError) {
|
|
324
|
-
cleanupErrors.push(`ffmpeg_stop_failed:${stopError.message}`);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
391
|
if (xvfb) {
|
|
329
392
|
try {
|
|
330
|
-
await stopXvfb(xvfb, {
|
|
331
|
-
timeoutMs: xvfbStopTimeoutMs,
|
|
332
|
-
});
|
|
393
|
+
await stopXvfb(xvfb, { timeoutMs: xvfbStopTimeoutMs });
|
|
333
394
|
} catch (stopError) {
|
|
334
395
|
cleanupErrors.push(`xvfb_stop_failed:${stopError.message}`);
|
|
335
396
|
}
|
|
@@ -339,6 +400,10 @@ export async function recordUrlNarration({
|
|
|
339
400
|
displayLease.release();
|
|
340
401
|
}
|
|
341
402
|
|
|
403
|
+
if (ownTempDir) {
|
|
404
|
+
await rm(recVideoDir, { recursive: true, force: true }).catch(() => {});
|
|
405
|
+
}
|
|
406
|
+
|
|
342
407
|
if (cleanupErrors.length > 0) {
|
|
343
408
|
if (primaryError) {
|
|
344
409
|
primaryError.cleanupErrors = cleanupErrors;
|
|
@@ -21,6 +21,61 @@ function normalizeRange(value) {
|
|
|
21
21
|
return [low, high];
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
// The recorder executes exactly these visual actions. There is no "scroll a bit"
|
|
25
|
+
// blind-scroll action: every scroll phase must say where it lands.
|
|
26
|
+
export const SUPPORTED_PHASE_ACTIONS = Object.freeze([
|
|
27
|
+
'hold',
|
|
28
|
+
'smooth_scroll',
|
|
29
|
+
'fast_scroll',
|
|
30
|
+
'linear_scroll_during',
|
|
31
|
+
'scroll_to_dwell',
|
|
32
|
+
'scroll_back',
|
|
33
|
+
'cursor_focus',
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
// Common spellings authors reach for, mapped onto the canonical action above.
|
|
37
|
+
// Note: scroll_down / scroll_up are intentionally NOT aliased — there is no blind
|
|
38
|
+
// scroll; an unrecognised action raises phase_action_unsupported so the plan gets
|
|
39
|
+
// fixed rather than silently degraded.
|
|
40
|
+
const PHASE_ACTION_ALIASES = new Map([
|
|
41
|
+
['scroll_to', 'scroll_to_dwell'],
|
|
42
|
+
['scrollto', 'scroll_to_dwell'],
|
|
43
|
+
['scroll', 'scroll_to_dwell'],
|
|
44
|
+
['scroll_to_region', 'scroll_to_dwell'],
|
|
45
|
+
['scroll_to_y', 'scroll_to_dwell'],
|
|
46
|
+
['dwell', 'scroll_to_dwell'],
|
|
47
|
+
['focus_hold', 'scroll_to_dwell'],
|
|
48
|
+
['pan', 'linear_scroll_during'],
|
|
49
|
+
['narrated_pan', 'linear_scroll_during'],
|
|
50
|
+
['linear_scroll', 'linear_scroll_during'],
|
|
51
|
+
['scroll_during', 'linear_scroll_during'],
|
|
52
|
+
['scroll_while_narrating', 'linear_scroll_during'],
|
|
53
|
+
['return', 'scroll_back'],
|
|
54
|
+
['return_anchor', 'scroll_back'],
|
|
55
|
+
['back', 'scroll_back'],
|
|
56
|
+
['scroll_to_top', 'scroll_back'],
|
|
57
|
+
['wait', 'hold'],
|
|
58
|
+
['pause', 'hold'],
|
|
59
|
+
['stay', 'hold'],
|
|
60
|
+
['focus', 'cursor_focus'],
|
|
61
|
+
['highlight', 'cursor_focus'],
|
|
62
|
+
]);
|
|
63
|
+
|
|
64
|
+
function normalizeActionName(rawValue) {
|
|
65
|
+
const name = normalizeText(rawValue).toLowerCase();
|
|
66
|
+
if (!name) return '';
|
|
67
|
+
if (SUPPORTED_PHASE_ACTIONS.includes(name)) return name;
|
|
68
|
+
return PHASE_ACTION_ALIASES.get(name) || name;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// `visual_action` may be a string (the action name) or an object ({type, target_y, ...}).
|
|
72
|
+
function visualActionObject(section = {}) {
|
|
73
|
+
const va = section?.visual_action;
|
|
74
|
+
if (va && typeof va === 'object') return va;
|
|
75
|
+
if (typeof va === 'string' && va.trim()) return { type: va.trim() };
|
|
76
|
+
return {};
|
|
77
|
+
}
|
|
78
|
+
|
|
24
79
|
function inferActionFromCameraMotion(phase = {}) {
|
|
25
80
|
const motion = normalizeText(phase.camera_motion ?? phase.cameraMotion).toLowerCase();
|
|
26
81
|
if (motion === 'narrated_pan') return 'linear_scroll_during';
|
|
@@ -30,19 +85,32 @@ function inferActionFromCameraMotion(phase = {}) {
|
|
|
30
85
|
return '';
|
|
31
86
|
}
|
|
32
87
|
|
|
88
|
+
function pickFirstNumber(...values) {
|
|
89
|
+
for (const value of values) {
|
|
90
|
+
if (value == null) continue;
|
|
91
|
+
const parsed = Number(value);
|
|
92
|
+
if (Number.isFinite(parsed)) return Math.round(parsed);
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
|
|
33
97
|
function normalizeSectionAsPhase(section = {}, index = 0) {
|
|
34
|
-
const phaseId = normalizeText(section.id ?? section.phase_id) || `phase_${index + 1}`;
|
|
35
|
-
const visualAction = section
|
|
36
|
-
? section.visual_action
|
|
37
|
-
: {};
|
|
98
|
+
const phaseId = normalizeText(section.id ?? section.phase_id ?? section.name) || `phase_${index + 1}`;
|
|
99
|
+
const visualAction = visualActionObject(section);
|
|
38
100
|
const focusRegion = normalizeRange(
|
|
39
101
|
section.focus_region
|
|
40
102
|
?? section.focusRegion
|
|
41
103
|
?? visualAction.focus_region
|
|
42
104
|
?? visualAction.focusRegion
|
|
43
105
|
);
|
|
44
|
-
const explicitAction =
|
|
45
|
-
const
|
|
106
|
+
const explicitAction = normalizeActionName(section.action ?? visualAction.type ?? visualAction.action);
|
|
107
|
+
const inferred = explicitAction || inferActionFromCameraMotion(section);
|
|
108
|
+
const targetY = pickFirstNumber(
|
|
109
|
+
section.target_y, section.to_y, section.y, section.scroll_y,
|
|
110
|
+
visualAction.target_y, visualAction.to_y, visualAction.y, visualAction.scroll_y,
|
|
111
|
+
);
|
|
112
|
+
const hasTarget = focusRegion != null || targetY != null;
|
|
113
|
+
const action = inferred || (hasTarget ? 'scroll_to_dwell' : 'hold');
|
|
46
114
|
|
|
47
115
|
return {
|
|
48
116
|
...section,
|
|
@@ -51,21 +119,42 @@ function normalizeSectionAsPhase(section = {}, index = 0) {
|
|
|
51
119
|
action,
|
|
52
120
|
focus_region: focusRegion ?? null,
|
|
53
121
|
visual_action: visualAction,
|
|
54
|
-
target_y:
|
|
55
|
-
from_y: section.from_y
|
|
56
|
-
to_y: section.to_y
|
|
57
|
-
transition_ms: section.transition_ms ?? visualAction.transition_ms ?? null,
|
|
58
|
-
duration_ms: section.duration_ms ?? section.dwell_ms ??
|
|
122
|
+
target_y: targetY,
|
|
123
|
+
from_y: pickFirstNumber(section.from_y, visualAction.from_y),
|
|
124
|
+
to_y: pickFirstNumber(section.to_y, visualAction.to_y, section.y, visualAction.y),
|
|
125
|
+
transition_ms: section.transition_ms ?? visualAction.transition_ms ?? visualAction.duration_ms ?? null,
|
|
126
|
+
duration_ms: section.duration_ms ?? section.dwell_ms ?? section.audio_duration_ms
|
|
127
|
+
?? (section.presentation && Number.isFinite(Number(section.presentation.duration))
|
|
128
|
+
? Math.round(Number(section.presentation.duration) * 1000)
|
|
129
|
+
: null),
|
|
59
130
|
};
|
|
60
131
|
}
|
|
61
132
|
|
|
62
133
|
export function normalizePlanPhases(plan = {}) {
|
|
63
|
-
const
|
|
64
|
-
if (
|
|
134
|
+
const topLevelPhases = Array.isArray(plan?.phases) ? plan.phases : [];
|
|
135
|
+
if (topLevelPhases.length > 0) {
|
|
136
|
+
return topLevelPhases.map((phase, index) => normalizeSectionAsPhase(phase, index));
|
|
137
|
+
}
|
|
65
138
|
|
|
66
139
|
const sections = Array.isArray(plan?.sections) ? plan.sections : [];
|
|
67
140
|
if (sections.length > 0) {
|
|
68
|
-
|
|
141
|
+
const flattened = [];
|
|
142
|
+
sections.forEach((section, sectionIndex) => {
|
|
143
|
+
const nested = Array.isArray(section?.phases) ? section.phases : null;
|
|
144
|
+
if (nested && nested.length > 0) {
|
|
145
|
+
const prefix = normalizeText(section.id ?? section.phase_id ?? section.name) || `s${sectionIndex + 1}`;
|
|
146
|
+
nested.forEach((subPhase, subIndex) => {
|
|
147
|
+
const merged = {
|
|
148
|
+
...subPhase,
|
|
149
|
+
id: subPhase.id ?? subPhase.phase_id ?? subPhase.name ?? `${prefix}_${subIndex + 1}`,
|
|
150
|
+
};
|
|
151
|
+
flattened.push(normalizeSectionAsPhase(merged, flattened.length));
|
|
152
|
+
});
|
|
153
|
+
} else {
|
|
154
|
+
flattened.push(normalizeSectionAsPhase(section, flattened.length));
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
return flattened;
|
|
69
158
|
}
|
|
70
159
|
|
|
71
160
|
const error = new Error('plan_phases_required');
|
|
@@ -74,13 +163,15 @@ export function normalizePlanPhases(plan = {}) {
|
|
|
74
163
|
}
|
|
75
164
|
|
|
76
165
|
function resolvePhaseAction(phase = {}) {
|
|
77
|
-
const explicit =
|
|
166
|
+
const explicit = normalizeActionName(
|
|
167
|
+
phase.action ?? phase.visual_action?.type ?? phase.visual_action?.action
|
|
168
|
+
);
|
|
78
169
|
if (explicit) return explicit;
|
|
79
170
|
return inferActionFromCameraMotion(phase);
|
|
80
171
|
}
|
|
81
172
|
|
|
82
173
|
function resolvePhaseId(phase = {}, index = 0) {
|
|
83
|
-
return normalizeText(phase.id ?? phase.phase_id) || `phase_${index + 1}`;
|
|
174
|
+
return normalizeText(phase.id ?? phase.phase_id ?? phase.name) || `phase_${index + 1}`;
|
|
84
175
|
}
|
|
85
176
|
|
|
86
177
|
function nowMs(getNowMs) {
|
|
@@ -94,9 +185,12 @@ function resolveTransitionMs(phase, fallback) {
|
|
|
94
185
|
}
|
|
95
186
|
|
|
96
187
|
function resolveTargetY(phase, fallback = null) {
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
188
|
+
const explicit = pickFirstNumber(
|
|
189
|
+
phase?.target_y, phase?.to_y, phase?.y, phase?.scroll_y,
|
|
190
|
+
phase?.visual_action?.target_y, phase?.visual_action?.to_y,
|
|
191
|
+
phase?.visual_action?.y, phase?.visual_action?.scroll_y,
|
|
192
|
+
);
|
|
193
|
+
if (explicit != null) return explicit;
|
|
100
194
|
|
|
101
195
|
const focusRegion = normalizeRange(
|
|
102
196
|
phase?.focus_region
|
|
@@ -113,6 +207,20 @@ function resolveTargetY(phase, fallback = null) {
|
|
|
113
207
|
return fallback;
|
|
114
208
|
}
|
|
115
209
|
|
|
210
|
+
function requireTargetY(phase, action) {
|
|
211
|
+
const targetY = resolveTargetY(phase, null);
|
|
212
|
+
if (targetY == null) {
|
|
213
|
+
const error = new Error(
|
|
214
|
+
`phase_target_y_required: phase "${resolvePhaseId(phase)}" uses "${action}" but has no `
|
|
215
|
+
+ 'target_y / to_y / y or focus_region — every scroll phase must say where it lands '
|
|
216
|
+
+ '(there is no blind scroll)',
|
|
217
|
+
);
|
|
218
|
+
error.code = 'PHASE_TARGET_Y_REQUIRED';
|
|
219
|
+
throw error;
|
|
220
|
+
}
|
|
221
|
+
return targetY;
|
|
222
|
+
}
|
|
223
|
+
|
|
116
224
|
function resolveFromY(phase, fallback = null) {
|
|
117
225
|
const raw = phase?.from_y ?? phase?.visual_action?.from_y;
|
|
118
226
|
const parsed = Number(raw);
|
|
@@ -192,8 +300,8 @@ async function executeHold(page, phase) {
|
|
|
192
300
|
return { anchorY: null };
|
|
193
301
|
}
|
|
194
302
|
|
|
195
|
-
async function executeSmoothScroll(page, phase
|
|
196
|
-
const targetY =
|
|
303
|
+
async function executeSmoothScroll(page, phase) {
|
|
304
|
+
const targetY = requireTargetY(phase, 'smooth_scroll');
|
|
197
305
|
const transitionMs = resolveTransitionMs(phase, 900);
|
|
198
306
|
await animateScroll(page, {
|
|
199
307
|
targetY,
|
|
@@ -205,8 +313,8 @@ async function executeSmoothScroll(page, phase, { fallbackTargetY = null } = {})
|
|
|
205
313
|
return { anchorY: targetY };
|
|
206
314
|
}
|
|
207
315
|
|
|
208
|
-
async function executeFastScroll(page, phase
|
|
209
|
-
const targetY =
|
|
316
|
+
async function executeFastScroll(page, phase) {
|
|
317
|
+
const targetY = requireTargetY(phase, 'fast_scroll');
|
|
210
318
|
const transitionMs = resolveTransitionMs(phase, 420);
|
|
211
319
|
await animateScroll(page, {
|
|
212
320
|
targetY,
|
|
@@ -218,12 +326,9 @@ async function executeFastScroll(page, phase, { fallbackTargetY = null } = {}) {
|
|
|
218
326
|
return { anchorY: targetY };
|
|
219
327
|
}
|
|
220
328
|
|
|
221
|
-
async function executeLinearScrollDuring(page, phase, {
|
|
222
|
-
fallbackFromY = null,
|
|
223
|
-
fallbackTargetY = null,
|
|
224
|
-
} = {}) {
|
|
329
|
+
async function executeLinearScrollDuring(page, phase, { fallbackFromY = null } = {}) {
|
|
225
330
|
const fromY = resolveFromY(phase, fallbackFromY);
|
|
226
|
-
const toY =
|
|
331
|
+
const toY = requireTargetY(phase, 'linear_scroll_during');
|
|
227
332
|
const durationMs = resolveDurationMs(phase, null);
|
|
228
333
|
if (!Number.isFinite(Number(durationMs)) || Number(durationMs) <= 0) {
|
|
229
334
|
const error = new Error('linear_scroll_duration_required');
|
|
@@ -247,8 +352,8 @@ async function executeLinearScrollDuring(page, phase, {
|
|
|
247
352
|
return { anchorY: toY };
|
|
248
353
|
}
|
|
249
354
|
|
|
250
|
-
async function executeScrollToDwell(page, phase
|
|
251
|
-
const targetY =
|
|
355
|
+
async function executeScrollToDwell(page, phase) {
|
|
356
|
+
const targetY = requireTargetY(phase, 'scroll_to_dwell');
|
|
252
357
|
const transitionMs = resolveTransitionMs(phase, 820);
|
|
253
358
|
await animateScroll(page, {
|
|
254
359
|
targetY,
|
|
@@ -286,8 +391,8 @@ async function executeScrollBack(page, phase, { fallbackTargetY = 0 } = {}) {
|
|
|
286
391
|
return { anchorY: targetY };
|
|
287
392
|
}
|
|
288
393
|
|
|
289
|
-
async function executeCursorFocus(page, phase
|
|
290
|
-
const targetY =
|
|
394
|
+
async function executeCursorFocus(page, phase) {
|
|
395
|
+
const targetY = requireTargetY(phase, 'cursor_focus');
|
|
291
396
|
const transitionMs = resolveTransitionMs(phase, 650);
|
|
292
397
|
await animateScroll(page, {
|
|
293
398
|
targetY,
|
|
@@ -313,34 +418,34 @@ async function executePhase(page, phase, {
|
|
|
313
418
|
initialAnchorY = 0,
|
|
314
419
|
} = {}) {
|
|
315
420
|
const action = resolvePhaseAction(phase);
|
|
316
|
-
const
|
|
421
|
+
const fallbackFromY = lastAnchorY ?? initialAnchorY;
|
|
317
422
|
|
|
318
423
|
if (action === 'hold') {
|
|
319
424
|
return executeHold(page, phase);
|
|
320
425
|
}
|
|
321
426
|
if (action === 'smooth_scroll') {
|
|
322
|
-
return executeSmoothScroll(page, phase
|
|
427
|
+
return executeSmoothScroll(page, phase);
|
|
323
428
|
}
|
|
324
429
|
if (action === 'fast_scroll') {
|
|
325
|
-
return executeFastScroll(page, phase
|
|
430
|
+
return executeFastScroll(page, phase);
|
|
326
431
|
}
|
|
327
432
|
if (action === 'linear_scroll_during') {
|
|
328
|
-
return executeLinearScrollDuring(page, phase, {
|
|
329
|
-
fallbackFromY: fallbackY,
|
|
330
|
-
fallbackTargetY: fallbackY,
|
|
331
|
-
});
|
|
433
|
+
return executeLinearScrollDuring(page, phase, { fallbackFromY });
|
|
332
434
|
}
|
|
333
435
|
if (action === 'scroll_to_dwell') {
|
|
334
|
-
return executeScrollToDwell(page, phase
|
|
436
|
+
return executeScrollToDwell(page, phase);
|
|
335
437
|
}
|
|
336
438
|
if (action === 'scroll_back') {
|
|
337
439
|
return executeScrollBack(page, phase, { fallbackTargetY: 0 });
|
|
338
440
|
}
|
|
339
441
|
if (action === 'cursor_focus') {
|
|
340
|
-
return executeCursorFocus(page, phase
|
|
442
|
+
return executeCursorFocus(page, phase);
|
|
341
443
|
}
|
|
342
444
|
|
|
343
|
-
const error = new Error(
|
|
445
|
+
const error = new Error(
|
|
446
|
+
`phase_action_unsupported:${action || 'empty'} — supported actions: ${SUPPORTED_PHASE_ACTIONS.join(', ')}`
|
|
447
|
+
+ ' (there is no blind scroll_down/scroll_up; use scroll_to_dwell with target_y or focus_region)',
|
|
448
|
+
);
|
|
344
449
|
error.code = 'PHASE_ACTION_UNSUPPORTED';
|
|
345
450
|
throw error;
|
|
346
451
|
}
|
package/src/chat-bridge.js
CHANGED
|
@@ -1430,10 +1430,10 @@ server.tool('get_library_file',
|
|
|
1430
1430
|
|
|
1431
1431
|
// ── record_url_narration ────────────────────────────────────────────────────────
|
|
1432
1432
|
server.tool('record_url_narration',
|
|
1433
|
-
'Record a silent video of a URL by
|
|
1433
|
+
'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
|
|
1434
1434
|
{
|
|
1435
1435
|
url: z.string().describe('Page URL to record'),
|
|
1436
|
-
plan: z.record(z.any()).describe('
|
|
1436
|
+
plan: z.record(z.any()).describe('A video plan: an object with `phases` (or `sections`), each a "visual beat" with `action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and `dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration). It can be hand-written or the output of plan_video_segments (whose returned segments array doubles as a valid plan).'),
|
|
1437
1437
|
output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
|
|
1438
1438
|
events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
|
|
1439
1439
|
viewport: z.object({
|
|
@@ -1468,7 +1468,7 @@ server.tool('submit_to_library',
|
|
|
1468
1468
|
target_platform: z.string().optional().describe('目标发布平台,如 xhs / douyin'),
|
|
1469
1469
|
metadata: z.record(z.any()).optional().describe('其它 metadata(brand_voice / persona / account / goal_state 等)'),
|
|
1470
1470
|
understanding: z.record(z.any()).optional().describe('analyze_page 输出'),
|
|
1471
|
-
plan: z.record(z.any()).optional().describe('
|
|
1471
|
+
plan: z.record(z.any()).optional().describe('plan_video_segments 输出(或手写的录屏 plan)'),
|
|
1472
1472
|
},
|
|
1473
1473
|
async (args) => {
|
|
1474
1474
|
if (isBlockedCvmaxEditorVideoTool('submit_to_library')) {
|
|
@@ -1529,7 +1529,7 @@ server.tool('request_approval',
|
|
|
1529
1529
|
platform: z.string().describe('Target platform, e.g. "x", "xhs", "email"'),
|
|
1530
1530
|
description: z.string().describe('Human-readable summary of what will happen if approved'),
|
|
1531
1531
|
payload: z.record(z.any()).describe('Full action parameters (content, media_urls, etc.)'),
|
|
1532
|
-
credential_id: z.string().optional().describe('Which account/credential to use.
|
|
1532
|
+
credential_id: z.string().optional().describe('Which account/credential to use. Accepts a workspace account_id, a real credential UUID, the account display name, or a role alias (主号/main/primary, 矩阵号/matrix/secondary, 测试号/test/incubator) — any value works as long as it uniquely matches one workspace account on the target platform. If publishing fails with publish_account_selection_required/ambiguous, pick a value from the returned candidates\' "selectors" list yourself instead of asking the user to re-type an account name.'),
|
|
1533
1533
|
},
|
|
1534
1534
|
async ({ action_type, platform, description, payload, credential_id }) => {
|
|
1535
1535
|
try {
|
|
@@ -45,6 +45,20 @@ function planDurationSec(audioDurationMs, bufferSec = 0.5) {
|
|
|
45
45
|
return Math.ceil(raw * 2) / 2; // round up to nearest 0.5s
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
// Run fn over items with a bounded number of concurrent workers (FIFO drain).
|
|
49
|
+
async function mapWithConcurrency(items, limit, fn) {
|
|
50
|
+
const queue = items.map((item, index) => ({ item, index }));
|
|
51
|
+
const workers = Array.from({ length: Math.max(1, Math.min(limit, queue.length)) }, async () => {
|
|
52
|
+
while (queue.length > 0) {
|
|
53
|
+
const next = queue.shift();
|
|
54
|
+
await fn(next.item, next.index);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
await Promise.all(workers);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const TTS_CONCURRENCY = 5;
|
|
61
|
+
|
|
48
62
|
export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_id, currentWorkspaceId, api }) {
|
|
49
63
|
if (!Array.isArray(segments) || segments.length === 0) {
|
|
50
64
|
return toolError('segments must be a non-empty array.');
|
|
@@ -58,20 +72,31 @@ export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_i
|
|
|
58
72
|
const planned = [];
|
|
59
73
|
const errors = [];
|
|
60
74
|
|
|
75
|
+
// Synthesize TTS for every text-bearing segment up front, in parallel (bounded),
|
|
76
|
+
// so an N-segment plan no longer pays N sequential round-trips to the TTS API.
|
|
77
|
+
const audioResults = new Array(segments.length).fill(null);
|
|
78
|
+
const ttsJobs = segments
|
|
79
|
+
.map((seg, i) => ({ i, text: String(seg.text ?? '').trim() }))
|
|
80
|
+
.filter(job => job.text);
|
|
81
|
+
await mapWithConcurrency(ttsJobs, TTS_CONCURRENCY, async ({ i, text }) => {
|
|
82
|
+
try {
|
|
83
|
+
audioResults[i] = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
84
|
+
} catch (err) {
|
|
85
|
+
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
86
|
+
audioResults[i] = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
errors.sort((a, b) => {
|
|
90
|
+
const na = Number((a.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
|
|
91
|
+
const nb = Number((b.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
|
|
92
|
+
return na - nb;
|
|
93
|
+
});
|
|
94
|
+
|
|
61
95
|
for (let i = 0; i < segments.length; i++) {
|
|
62
96
|
const seg = segments[i];
|
|
63
97
|
const text = String(seg.text ?? '').trim();
|
|
64
98
|
const kind = String(seg.visual_kind ?? 'image');
|
|
65
|
-
|
|
66
|
-
let audioResult = null;
|
|
67
|
-
if (text) {
|
|
68
|
-
try {
|
|
69
|
-
audioResult = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
70
|
-
} catch (err) {
|
|
71
|
-
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
72
|
-
audioResult = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
73
|
-
}
|
|
74
|
-
}
|
|
99
|
+
const audioResult = audioResults[i];
|
|
75
100
|
|
|
76
101
|
const audioDurationMs = audioResult?.audio_duration_ms ?? 0;
|
|
77
102
|
let presentation;
|
|
@@ -87,11 +112,19 @@ export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_i
|
|
|
87
112
|
presentation = { duration, ...(kind === 'scroll' ? { style: 'scroll' } : {}) };
|
|
88
113
|
}
|
|
89
114
|
|
|
115
|
+
// dwell_ms lets the same segment double as a record_url_narration plan phase
|
|
116
|
+
// (the recorder reads dwell_ms / duration_ms for how long to hold each beat).
|
|
117
|
+
// Prefer the real measured audio length; fall back to the planned visual duration.
|
|
118
|
+
const dwellMs = audioDurationMs > 0
|
|
119
|
+
? audioDurationMs
|
|
120
|
+
: Math.round((presentation.duration ?? presentation.per_card_duration ?? 4) * 1000);
|
|
121
|
+
|
|
90
122
|
const planned_seg = {
|
|
91
123
|
...seg,
|
|
92
124
|
...(audioResult?.audio_path ? { audio_path: audioResult.audio_path } : {}),
|
|
93
125
|
...(text ? { subtitle_text: text } : {}),
|
|
94
126
|
presentation: { ...presentation, ...(seg.presentation ?? {}) },
|
|
127
|
+
dwell_ms: seg.dwell_ms ?? dwellMs,
|
|
95
128
|
};
|
|
96
129
|
if (audioResult?.audio_duration_ms) {
|
|
97
130
|
planned_seg.audio_duration_ms = audioResult.audio_duration_ms;
|