@lightcone-ai/daemon 0.15.47 → 0.15.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -9,6 +9,53 @@ const DEFAULT_WIDTH = 1080;
|
|
|
9
9
|
const DEFAULT_HEIGHT = 1920;
|
|
10
10
|
const DEFAULT_FPS = 30;
|
|
11
11
|
const TRANSITION_DURATION = 0.5;
|
|
12
|
+
const SUBTITLE_FONT = 'PingFang SC,Microsoft YaHei,Arial';
|
|
13
|
+
const SUBTITLE_FONT_SIZE = 72;
|
|
14
|
+
const SUBTITLE_MARGIN_V = 120;
|
|
15
|
+
|
|
16
|
+
function msToAssTimestamp(ms) {
|
|
17
|
+
const totalCs = Math.round(Math.max(0, ms) / 10);
|
|
18
|
+
const cs = totalCs % 100;
|
|
19
|
+
const totalSec = Math.floor(totalCs / 100);
|
|
20
|
+
const sec = totalSec % 60;
|
|
21
|
+
const min = Math.floor(totalSec / 60) % 60;
|
|
22
|
+
const hr = Math.floor(totalSec / 3600);
|
|
23
|
+
return `${hr}:${String(min).padStart(2, '0')}:${String(sec).padStart(2, '0')}.${String(cs).padStart(2, '0')}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function wrapSubtitleText(text, maxChars = 14) {
|
|
27
|
+
const chars = Array.from(String(text ?? ''));
|
|
28
|
+
if (chars.length <= maxChars) return chars.join('');
|
|
29
|
+
const lines = [];
|
|
30
|
+
for (let i = 0; i < chars.length; i += maxChars) {
|
|
31
|
+
lines.push(chars.slice(i, i + maxChars).join(''));
|
|
32
|
+
}
|
|
33
|
+
return lines.join('\\N');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function buildAssContent(subtitles, { playResX = DEFAULT_WIDTH, playResY = DEFAULT_HEIGHT } = {}) {
|
|
37
|
+
const maxChars = Math.floor((playResX - 60) / SUBTITLE_FONT_SIZE);
|
|
38
|
+
const header = [
|
|
39
|
+
'[Script Info]',
|
|
40
|
+
'ScriptType: v4.00+',
|
|
41
|
+
`PlayResX: ${playResX}`,
|
|
42
|
+
`PlayResY: ${playResY}`,
|
|
43
|
+
'WrapStyle: 2',
|
|
44
|
+
'',
|
|
45
|
+
'[V4+ Styles]',
|
|
46
|
+
'Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
|
|
47
|
+
`Style: Default,${SUBTITLE_FONT},${SUBTITLE_FONT_SIZE},&H00FFFFFF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,4,0,2,30,30,${SUBTITLE_MARGIN_V},1`,
|
|
48
|
+
'',
|
|
49
|
+
'[Events]',
|
|
50
|
+
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
|
51
|
+
].join('\n');
|
|
52
|
+
const events = subtitles.map(({ text, start_ms, end_ms }) => {
|
|
53
|
+
const wrapped = wrapSubtitleText(text, maxChars);
|
|
54
|
+
const safe = wrapped.replace(/\r?\n/g, '\\N').replace(/,/g, '{\\,}');
|
|
55
|
+
return `Dialogue: 0,${msToAssTimestamp(start_ms)},${msToAssTimestamp(end_ms)},Default,,0,0,0,,${safe}`;
|
|
56
|
+
});
|
|
57
|
+
return `${header}\n${events.join('\n')}\n`;
|
|
58
|
+
}
|
|
12
59
|
|
|
13
60
|
async function fileExists(p) {
|
|
14
61
|
try { await access(p, fsConstants.R_OK); return true; } catch { return false; }
|
|
@@ -245,7 +292,8 @@ export async function composeVideoV2({
|
|
|
245
292
|
finalClip = await silentClip({ videoPath: visualClip.path, duration: visualClip.duration, tmpDir });
|
|
246
293
|
}
|
|
247
294
|
|
|
248
|
-
|
|
295
|
+
const subtitleText = typeof seg.subtitle_text === 'string' ? seg.subtitle_text.trim() : '';
|
|
296
|
+
readyClips.push({ path: finalClip, duration: visualClip.duration, transition, subtitleText });
|
|
249
297
|
}
|
|
250
298
|
|
|
251
299
|
const outroClips = [];
|
|
@@ -256,6 +304,20 @@ export async function composeVideoV2({
|
|
|
256
304
|
}
|
|
257
305
|
}
|
|
258
306
|
|
|
307
|
+
// Build subtitle entries with cumulative timeline timestamps
|
|
308
|
+
let cursorMs = 0;
|
|
309
|
+
const subtitleEntries = [];
|
|
310
|
+
for (const clip of readyClips) {
|
|
311
|
+
if (clip.subtitleText) {
|
|
312
|
+
subtitleEntries.push({
|
|
313
|
+
text: clip.subtitleText,
|
|
314
|
+
start_ms: cursorMs,
|
|
315
|
+
end_ms: cursorMs + Math.round(clip.duration * 1000),
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
cursorMs += Math.round(clip.duration * 1000);
|
|
319
|
+
}
|
|
320
|
+
|
|
259
321
|
const allClips = [];
|
|
260
322
|
let accumulated = readyClips[0].path;
|
|
261
323
|
for (let i = 1; i < readyClips.length; i++) {
|
|
@@ -271,10 +333,29 @@ export async function composeVideoV2({
|
|
|
271
333
|
|
|
272
334
|
const finalSequence = [...allClips, ...outroClips];
|
|
273
335
|
|
|
336
|
+
// Compose without subtitles first (subtitles are burned in a separate pass)
|
|
337
|
+
const preSubPath = subtitleEntries.length > 0
|
|
338
|
+
? path.join(tmpDir, `pre-sub-${randomUUID().slice(0, 8)}.mp4`)
|
|
339
|
+
: outPath;
|
|
340
|
+
|
|
274
341
|
if (finalSequence.length === 1) {
|
|
275
|
-
await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart',
|
|
342
|
+
await runFfmpeg(['-i', finalSequence[0], '-c', 'copy', '-movflags', '+faststart', preSubPath], 'ffmpeg copy');
|
|
276
343
|
} else {
|
|
277
|
-
await concatWithCuts({ clips: finalSequence, outputPath:
|
|
344
|
+
await concatWithCuts({ clips: finalSequence, outputPath: preSubPath });
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Burn subtitles into final output
|
|
348
|
+
if (subtitleEntries.length > 0) {
|
|
349
|
+
const assPath = path.join(tmpDir, `subs-${randomUUID().slice(0, 8)}.ass`);
|
|
350
|
+
await writeFile(assPath, buildAssContent(subtitleEntries, { playResX: width, playResY: height }));
|
|
351
|
+
const escapedAssPath = assPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
|
|
352
|
+
await runFfmpeg([
|
|
353
|
+
'-i', preSubPath,
|
|
354
|
+
'-vf', `subtitles='${escapedAssPath}'`,
|
|
355
|
+
'-c:a', 'copy',
|
|
356
|
+
'-movflags', '+faststart',
|
|
357
|
+
outPath,
|
|
358
|
+
], 'ffmpeg burn-subtitles');
|
|
278
359
|
}
|
|
279
360
|
|
|
280
361
|
const totalDuration = await probeDurationSec(outPath);
|
package/src/chat-bridge.js
CHANGED
|
@@ -16,6 +16,7 @@ import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
|
|
|
16
16
|
import { runRenderTextToImageTool } from './tools/render-text-to-image.js';
|
|
17
17
|
import { runRenderHtmlToImageTool } from './tools/render-html-to-image.js';
|
|
18
18
|
import { runSynthesisTtsTool } from './tools/synthesize-tts.js';
|
|
19
|
+
import { runPlanVideoSegmentsTool } from './tools/plan-video-segments.js';
|
|
19
20
|
import { runComposeVideoV2Tool } from './tools/compose-video-v2.js';
|
|
20
21
|
import { runTakePageScreenshotTool } from './tools/take-page-screenshot.js';
|
|
21
22
|
import { runGetLibraryFileTool } from './tools/get-library-file.js';
|
|
@@ -1357,24 +1358,44 @@ server.tool('synthesize_tts',
|
|
|
1357
1358
|
async (args) => runSynthesisTtsTool({ ...args, currentWorkspaceId, api })
|
|
1358
1359
|
);
|
|
1359
1360
|
|
|
1361
|
+
// ── plan_video_segments ────────────────────────────────────────────────────────
|
|
1362
|
+
server.tool('plan_video_segments',
|
|
1363
|
+
'Universal audio-video sync planning step. For each segment, call TTS to get the real audio duration, then compute the visual duration with a safety buffer. Returns a planned segments array ready to pass directly to compose_video_v2 (with audio_path, presentation.duration/per_card_duration, and subtitle_text pre-filled). Always call this before compose_video_v2 when you have narration text.',
|
|
1364
|
+
{
|
|
1365
|
+
segments: z.array(z.object({
|
|
1366
|
+
text: z.string().describe('Narration text for this segment. TTS will be generated from this.'),
|
|
1367
|
+
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.'),
|
|
1368
|
+
visual_path: z.string().optional().describe('Absolute path to a single image, video, or gif file.'),
|
|
1369
|
+
visual_paths: z.array(z.string()).optional().describe('For carousel: array of image paths, one per card.'),
|
|
1370
|
+
transition: z.enum(['cut', 'fade', 'crossfade']).optional().describe('Transition to next segment. Default cut.'),
|
|
1371
|
+
presentation: z.object({
|
|
1372
|
+
style: z.enum(['static', 'scroll']).optional(),
|
|
1373
|
+
}).optional().describe('Partial presentation hints (style only). duration/per_card_duration are computed from TTS.'),
|
|
1374
|
+
})).describe('Segments to plan. Each must have narration text and visual info.'),
|
|
1375
|
+
voice_id: z.string().optional().describe('TTS voice ID. Omit to use workspace default.'),
|
|
1376
|
+
workspace_id: z.string().optional().describe('Target workspace. Defaults to current workspace context.'),
|
|
1377
|
+
},
|
|
1378
|
+
async (args) => runPlanVideoSegmentsTool({ ...args, currentWorkspaceId, api })
|
|
1379
|
+
);
|
|
1380
|
+
|
|
1360
1381
|
// ── compose_video_v2 ───────────────────────────────────────────────────────────
|
|
1361
1382
|
server.tool('compose_video_v2',
|
|
1362
|
-
'Compose a video from a list of segments using ffmpeg. Each segment has a visual source (image/scroll/carousel/video/gif) and optional
|
|
1383
|
+
'Compose a video from a list of segments using ffmpeg. Each segment has a visual source (image/scroll/carousel/video/gif), optional audio, and optional subtitle text. Subtitles are burned into the video by default when subtitle_text is provided. Segments are concatenated in order; outro clips are appended at the end. Returns a local mp4 path.\n\nTypical flow: plan_video_segments → compose_video_v2 (segments output fed directly in).',
|
|
1363
1384
|
{
|
|
1364
1385
|
segments: z.array(z.object({
|
|
1365
1386
|
visual_path: z.string().optional().describe('Absolute path to a single image, video, or gif file.'),
|
|
1366
1387
|
visual_paths: z.array(z.string()).optional().describe('For carousel: array of image paths, one per card.'),
|
|
1367
|
-
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.
|
|
1388
|
+
visual_kind: z.enum(['image', 'video', 'gif', 'carousel']).describe('Type of visual.'),
|
|
1368
1389
|
presentation: z.object({
|
|
1369
|
-
style: z.enum(['static', 'scroll']).optional().describe('For image: static (default) or scroll (pan upward).
|
|
1370
|
-
duration: z.number().optional().describe('Segment duration in seconds. Required for image/scroll.
|
|
1390
|
+
style: z.enum(['static', 'scroll']).optional().describe('For image: static (default) or scroll (pan upward).'),
|
|
1391
|
+
duration: z.number().optional().describe('Segment duration in seconds. Required for image/scroll.'),
|
|
1371
1392
|
per_card_duration: z.number().optional().describe('Seconds per card for carousel.'),
|
|
1372
1393
|
}).optional(),
|
|
1373
|
-
audio_path: z.string().nullable().optional().describe('Absolute path to
|
|
1394
|
+
audio_path: z.string().nullable().optional().describe('Absolute path to audio (mp3). null or omit for silence.'),
|
|
1395
|
+
subtitle_text: z.string().optional().describe('Narration text to burn as subtitle for this segment. Displayed for the full segment duration.'),
|
|
1374
1396
|
transition: z.enum(['cut', 'fade', 'crossfade']).optional().describe('Transition to next segment. Default cut.'),
|
|
1375
1397
|
})).describe('Ordered list of video segments.'),
|
|
1376
1398
|
outro_paths: z.array(z.string()).optional().describe('Absolute paths to outro video clips appended after all segments.'),
|
|
1377
|
-
format: z.string().optional().describe('Aspect ratio. Default "9:16".'),
|
|
1378
1399
|
resolution: z.string().optional().describe('Output resolution WxH. Default "1080x1920".'),
|
|
1379
1400
|
output_path: z.string().optional().describe('Absolute output path for the mp4. Auto-generated if omitted.'),
|
|
1380
1401
|
},
|
|
@@ -1595,10 +1616,10 @@ server.tool('execute_approved_action',
|
|
|
1595
1616
|
|
|
1596
1617
|
// ── promote_context ───────────────────────────────────────────────────────────
|
|
1597
1618
|
server.tool('promote_context',
|
|
1598
|
-
'Submit a
|
|
1619
|
+
'Submit a candidate for human review to add to the workspace\'s persistent context. Use this (1) after finishing a task when you discover a stable fact, convention, or learning future agents should know, or (2) immediately after responding to a user message when the user expressed a persistent operational preference (e.g. "always add links", "use subtitles by default"). The candidate appears in the workspace owner\'s "My Context → Pending Proposals" panel; once confirmed, it is auto-injected into every future agent\'s system prompt under "## Workspace context". This is the only sanctioned path for shared knowledge or preference governance.',
|
|
1599
1620
|
{
|
|
1600
1621
|
workspace_id: z.string().optional().describe('Target workspace id. Defaults to your current workspace if omitted.'),
|
|
1601
|
-
type: z.enum(['knowledge', 'workspace_norm', 'memory']).optional().describe('Candidate type
|
|
1622
|
+
type: z.enum(['knowledge', 'workspace_norm', 'memory', 'preference']).optional().describe('Candidate type: "knowledge" for facts/learnings, "workspace_norm" for standing rules, "memory" for durable facts, "preference" for persistent user preferences expressed in conversation.'),
|
|
1602
1623
|
summary: z.string().describe('One-line title that reviewers will see in the Pending Proposals list.'),
|
|
1603
1624
|
content: z.string().describe('Full candidate text that future agents will read. Be concrete, citable, and self-contained.'),
|
|
1604
1625
|
source_message_id: z.string().optional().describe('Optional message id that motivated this candidate, for audit trail.'),
|
|
@@ -1627,11 +1648,12 @@ server.tool('promote_context',
|
|
|
1627
1648
|
reason,
|
|
1628
1649
|
});
|
|
1629
1650
|
const proposal = data?.proposal ?? {};
|
|
1651
|
+
const resolvedType = type ?? 'knowledge';
|
|
1630
1652
|
return {
|
|
1631
1653
|
content: [{
|
|
1632
1654
|
type: 'text',
|
|
1633
1655
|
text:
|
|
1634
|
-
|
|
1656
|
+
`${resolvedType === 'preference' ? 'Preference' : 'Knowledge'} candidate submitted.\n` +
|
|
1635
1657
|
`proposal_id=${proposal.id ?? 'unknown'} workspace=${proposal.workspaceId ?? targetWorkspaceId} status=${proposal.status ?? 'candidate'}\n` +
|
|
1636
1658
|
`It is now visible in the workspace owner's "My Context → Pending Proposals" panel; once confirmed, it will be injected into every future agent's "## Workspace context".`,
|
|
1637
1659
|
}],
|
package/src/drivers/claude.js
CHANGED
|
@@ -238,7 +238,7 @@ The active workspace context (Goal State, constraints, decisions, knowledge) is
|
|
|
238
238
|
|
|
239
239
|
**Write rule:**
|
|
240
240
|
- Personal learnings → \`${t("write_memory")}\`
|
|
241
|
-
- Workspace-level knowledge
|
|
241
|
+
- Workspace-level knowledge or persistent user preferences → \`${t("promote_context")}\` (see below). Do **not** dump shared knowledge into ad-hoc files inside the workspace shared workspace.
|
|
242
242
|
- **Any file you produce for a task** → \`${t("write_workspace")}({ path: "artifacts/your-file.ext", ... })\` or \`${t("write_workspace_file")}({ file_path, path: "artifacts/your-file.ext" })\`
|
|
243
243
|
|
|
244
244
|
Temporary local files belong under \`tmp/\` in your personal workspace. If you need to show an image in chat, first save the durable copy to \`artifacts/\`, then optionally call \`${t("upload_image")}\` for a temporary public preview URL.
|
|
@@ -258,8 +258,8 @@ Example: writing a web page → \`${t("write_workspace")}({ path: "artifacts/job
|
|
|
258
258
|
- \`${t("write_workspace_file")}({ file_path, path })\` — write a local file from your workspace to a workspace artifact without putting base64 in context
|
|
259
259
|
- \`${t("list_workspace")}()\` — list all files in the workspace
|
|
260
260
|
|
|
261
|
-
**Workspace knowledge governance:**
|
|
262
|
-
- \`${t("promote_context")}({ workspace_id, type, summary, content })\` — submit a
|
|
261
|
+
**Workspace knowledge & preference governance:**
|
|
262
|
+
- \`${t("promote_context")}({ workspace_id, type, summary, content })\` — submit a candidate for human review. Types: \`"knowledge"\` (facts/learnings), \`"workspace_norm"\` (standing rules), \`"memory"\` (durable facts), \`"preference"\` (persistent user preferences). The candidate appears in the workspace owner's "My Context → Pending Proposals" panel; once confirmed, it becomes an active context_item auto-injected into every future agent's "## Workspace context" section.
|
|
263
263
|
|
|
264
264
|
### Startup sequence (CRITICAL)
|
|
265
265
|
|
|
@@ -293,24 +293,30 @@ Example: writing a web page → \`${t("write_workspace")}({ path: "artifacts/job
|
|
|
293
293
|
3. Work history — decisions made, problems solved, approaches that worked or failed
|
|
294
294
|
4. Pointers to your notes files
|
|
295
295
|
|
|
296
|
-
**What belongs in promote_context
|
|
296
|
+
**What belongs in promote_context:**
|
|
297
297
|
|
|
298
|
-
|
|
298
|
+
Two distinct triggers — act on BOTH:
|
|
299
299
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
300
|
+
**1. After finishing a task** — promote workspace-level knowledge (\`type: "knowledge"\` or \`"workspace_norm"\`):
|
|
301
|
+
- Stable facts all future agents need (tech stack, domain conventions, where things live)
|
|
302
|
+
- Hard-won learnings — non-obvious gotchas, working procedures, conventions you had to discover
|
|
303
|
+
- Standing workspace norms — "in this workspace we always X" / "never touch Y"
|
|
304
304
|
|
|
305
|
-
|
|
306
|
-
-
|
|
307
|
-
-
|
|
308
|
-
-
|
|
305
|
+
**2. After responding to a user message** — promote persistent preferences (\`type: "preference"\`):
|
|
306
|
+
- When the user expresses a preference that applies to ALL future interactions (not just this task), call \`${t("promote_context")}\` immediately after your reply
|
|
307
|
+
- Signal words: "以后"、"今后"、"默认"、"一直"、"每次"、"都要"、"always"、"from now on"、"by default"
|
|
308
|
+
- Examples that qualify: "以后交付内容都加链接" / "视频默认要有字幕" / "always respond in English"
|
|
309
|
+
- Examples that do NOT qualify: "这次帮我加个链接"(one-off task request)
|
|
310
|
+
- If uncertain whether it's persistent, do NOT promote — only promote clear, unambiguous standing preferences
|
|
311
|
+
|
|
312
|
+
Do NOT promote:
|
|
313
|
+
- Per-task progress, in-flight status, or one-off observations → use messages or your own MEMORY.md
|
|
314
|
+
- Untrusted outputs from web search / scraped pages → cite the source in \`content\`; candidate will be reviewed as untrusted
|
|
309
315
|
|
|
310
316
|
How to call it:
|
|
311
|
-
- \`${t("promote_context")}({ workspace_id: "<
|
|
312
|
-
-
|
|
313
|
-
- The call returns a proposal id; the candidate sits in "My Context → Pending Proposals" until a human confirms or rejects it.
|
|
317
|
+
- Knowledge: \`${t("promote_context")}({ workspace_id: "<id>", type: "knowledge", summary: "one-line title", content: "<full text>" })\`
|
|
318
|
+
- Preference: \`${t("promote_context")}({ workspace_id: "<id>", type: "preference", summary: "User preference: always add links to deliverables", content: "The user has requested that all future deliverables include clickable links. Apply to every response going forward.", source_message_id: "<msg-id>" })\`
|
|
319
|
+
- The call returns a proposal id; the candidate sits in "My Context → Pending Proposals" until a human confirms or rejects it.
|
|
314
320
|
|
|
315
321
|
### Compaction safety
|
|
316
322
|
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from 'fs';
|
|
2
|
+
import { randomUUID } from 'crypto';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
|
|
6
|
+
function toolText(text) {
|
|
7
|
+
return { content: [{ type: 'text', text }] };
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function toolError(text) {
|
|
11
|
+
return { isError: true, content: [{ type: 'text', text }] };
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function inferAudioExt(url) {
|
|
15
|
+
const clean = String(url ?? '').split('?')[0];
|
|
16
|
+
const ext = path.extname(clean).toLowerCase();
|
|
17
|
+
return ['.mp3', '.wav', '.flac', '.aac', '.ogg'].includes(ext) ? ext : '.mp3';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async function synthesizeSegmentTts(text, { workspace_id, voice_id, api }) {
|
|
21
|
+
const payload = { workspace_id, text, speed: 1, format: 'mp3' };
|
|
22
|
+
if (voice_id) payload.voice_preset = String(voice_id).trim();
|
|
23
|
+
|
|
24
|
+
const data = await api('POST', '/tts/voiceover', payload);
|
|
25
|
+
const remoteAudioUrl = String(data.audio_url ?? '').trim();
|
|
26
|
+
if (!remoteAudioUrl) throw new Error('TTS API did not return audio_url');
|
|
27
|
+
|
|
28
|
+
const downloadRes = await fetch(remoteAudioUrl);
|
|
29
|
+
if (!downloadRes.ok) throw new Error(`Failed to download audio (${downloadRes.status})`);
|
|
30
|
+
|
|
31
|
+
const fileBuffer = Buffer.from(await downloadRes.arrayBuffer());
|
|
32
|
+
const outDir = path.join(os.tmpdir(), 'lightcone-tts');
|
|
33
|
+
mkdirSync(outDir, { recursive: true });
|
|
34
|
+
const ext = inferAudioExt(remoteAudioUrl);
|
|
35
|
+
const outPath = path.join(outDir, `tts-${Date.now()}-${randomUUID().slice(0, 8)}${ext}`);
|
|
36
|
+
writeFileSync(outPath, fileBuffer);
|
|
37
|
+
|
|
38
|
+
const durationMs = Number(data.duration_ms ?? 0);
|
|
39
|
+
return { audio_path: outPath, audio_duration_ms: durationMs };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Compute segment duration from audio duration: audio + 0.5s buffer, rounded up to nearest 0.5s.
|
|
43
|
+
function planDurationSec(audioDurationMs, bufferSec = 0.5) {
|
|
44
|
+
const raw = audioDurationMs / 1000 + bufferSec;
|
|
45
|
+
return Math.ceil(raw * 2) / 2; // round up to nearest 0.5s
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_id, currentWorkspaceId, api }) {
|
|
49
|
+
if (!Array.isArray(segments) || segments.length === 0) {
|
|
50
|
+
return toolError('segments must be a non-empty array.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const targetWorkspaceId = String(workspace_id ?? currentWorkspaceId ?? '').trim();
|
|
54
|
+
if (!targetWorkspaceId) {
|
|
55
|
+
return toolError('workspace_id is required (no current workspace context).');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const planned = [];
|
|
59
|
+
const errors = [];
|
|
60
|
+
|
|
61
|
+
for (let i = 0; i < segments.length; i++) {
|
|
62
|
+
const seg = segments[i];
|
|
63
|
+
const text = String(seg.text ?? '').trim();
|
|
64
|
+
const kind = String(seg.visual_kind ?? 'image');
|
|
65
|
+
|
|
66
|
+
let audioResult = null;
|
|
67
|
+
if (text) {
|
|
68
|
+
try {
|
|
69
|
+
audioResult = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
70
|
+
} catch (err) {
|
|
71
|
+
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
72
|
+
audioResult = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const audioDurationMs = audioResult?.audio_duration_ms ?? 0;
|
|
77
|
+
let presentation;
|
|
78
|
+
|
|
79
|
+
if (kind === 'carousel') {
|
|
80
|
+
const numCards = Array.isArray(seg.visual_paths) ? seg.visual_paths.length : 1;
|
|
81
|
+
const totalDuration = audioDurationMs > 0 ? planDurationSec(audioDurationMs) : numCards * 4;
|
|
82
|
+
const perCard = Math.max(2, Math.ceil((totalDuration / numCards) * 2) / 2);
|
|
83
|
+
presentation = { per_card_duration: perCard };
|
|
84
|
+
} else {
|
|
85
|
+
// image, scroll, video, gif
|
|
86
|
+
const duration = audioDurationMs > 0 ? planDurationSec(audioDurationMs, kind === 'scroll' ? 1.0 : 0.5) : 4;
|
|
87
|
+
presentation = { duration, ...(kind === 'scroll' ? { style: 'scroll' } : {}) };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const planned_seg = {
|
|
91
|
+
...seg,
|
|
92
|
+
...(audioResult?.audio_path ? { audio_path: audioResult.audio_path } : {}),
|
|
93
|
+
...(text ? { subtitle_text: text } : {}),
|
|
94
|
+
presentation: { ...presentation, ...(seg.presentation ?? {}) },
|
|
95
|
+
};
|
|
96
|
+
if (audioResult?.audio_duration_ms) {
|
|
97
|
+
planned_seg.audio_duration_ms = audioResult.audio_duration_ms;
|
|
98
|
+
}
|
|
99
|
+
planned.push(planned_seg);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const result = {
|
|
103
|
+
segments: planned,
|
|
104
|
+
total_segments: planned.length,
|
|
105
|
+
total_duration_ms: planned.reduce((sum, s) => {
|
|
106
|
+
const d = s.presentation?.per_card_duration
|
|
107
|
+
? s.presentation.per_card_duration * (Array.isArray(s.visual_paths) ? s.visual_paths.length : 1)
|
|
108
|
+
: (s.presentation?.duration ?? 4);
|
|
109
|
+
return sum + Math.round(d * 1000);
|
|
110
|
+
}, 0),
|
|
111
|
+
...(errors.length > 0 ? { warnings: errors } : {}),
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
return toolText(JSON.stringify(result, null, 2));
|
|
115
|
+
}
|
|
@@ -1,440 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'node:child_process';
|
|
2
|
-
import path from 'node:path';
|
|
3
|
-
import { access, mkdir, mkdtemp, rm, stat, writeFile } from 'node:fs/promises';
|
|
4
|
-
import { constants as fsConstants } from 'node:fs';
|
|
5
|
-
import os from 'node:os';
|
|
6
|
-
|
|
7
|
-
const SUBTITLE_FONT = 'WenQuanYi Micro Hei';
|
|
8
|
-
const SUBTITLE_FONT_SIZE = 72;
|
|
9
|
-
const SUBTITLE_MARGIN_V = 80;
|
|
10
|
-
|
|
11
|
-
const MAX_STDERR_LENGTH = 4000;
|
|
12
|
-
|
|
13
|
-
const TRANSCODE_TARGETS = Object.freeze({
|
|
14
|
-
short_video_cn: {
|
|
15
|
-
width: 1080,
|
|
16
|
-
height: 1920,
|
|
17
|
-
fps: 30,
|
|
18
|
-
videoCodec: 'libx264',
|
|
19
|
-
profile: 'baseline',
|
|
20
|
-
pixelFormat: 'yuv420p',
|
|
21
|
-
crf: 23,
|
|
22
|
-
preset: 'veryfast',
|
|
23
|
-
level: '4.0',
|
|
24
|
-
audioCodec: 'aac',
|
|
25
|
-
audioBitrate: '128k',
|
|
26
|
-
audioSampleRate: 48000,
|
|
27
|
-
audioChannels: 2,
|
|
28
|
-
},
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
function normalizeText(value) {
|
|
32
|
-
if (typeof value !== 'string') return '';
|
|
33
|
-
return value.trim();
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
function normalizePath(value, label) {
|
|
37
|
-
const raw = normalizeText(value);
|
|
38
|
-
if (!raw) throw new Error(`${label} required`);
|
|
39
|
-
return path.resolve(raw);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
async function ensureReadableFile(filePath, label) {
|
|
43
|
-
try {
|
|
44
|
-
await access(filePath, fsConstants.R_OK);
|
|
45
|
-
} catch {
|
|
46
|
-
throw new Error(`${label} not found or unreadable: ${filePath}`);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const st = await stat(filePath);
|
|
50
|
-
if (!st.isFile()) throw new Error(`${label} is not a file: ${filePath}`);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
async function ensureParentDir(filePath) {
|
|
54
|
-
await mkdir(path.dirname(filePath), { recursive: true });
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function sanitizeStderr(stderr) {
|
|
58
|
-
const text = String(stderr ?? '').trim();
|
|
59
|
-
if (!text) return '';
|
|
60
|
-
if (text.length <= MAX_STDERR_LENGTH) return text;
|
|
61
|
-
return text.slice(text.length - MAX_STDERR_LENGTH);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function toolError(prefix, error, stderr = '') {
|
|
65
|
-
const details = [];
|
|
66
|
-
const message = normalizeText(error?.message);
|
|
67
|
-
if (message) details.push(message);
|
|
68
|
-
const cleanedStderr = sanitizeStderr(stderr);
|
|
69
|
-
if (cleanedStderr) details.push(cleanedStderr);
|
|
70
|
-
const suffix = details.length > 0 ? `: ${details.join(' | ')}` : '';
|
|
71
|
-
return new Error(`${prefix}${suffix}`);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
function runProcess(binary, args, { name = binary } = {}) {
|
|
75
|
-
return new Promise((resolve, reject) => {
|
|
76
|
-
const child = spawn(binary, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
77
|
-
let stdout = '';
|
|
78
|
-
let stderr = '';
|
|
79
|
-
|
|
80
|
-
child.stdout.on('data', (chunk) => {
|
|
81
|
-
stdout += chunk.toString();
|
|
82
|
-
});
|
|
83
|
-
child.stderr.on('data', (chunk) => {
|
|
84
|
-
stderr += chunk.toString();
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
child.on('error', (error) => {
|
|
88
|
-
reject(toolError(`${name} failed`, error, stderr));
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
child.on('close', (code) => {
|
|
92
|
-
if (code === 0) {
|
|
93
|
-
resolve({ stdout, stderr });
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
reject(toolError(`${name} exited with code ${code}`, null, stderr));
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function normalizeStartMs(value) {
|
|
102
|
-
const parsed = Number(value);
|
|
103
|
-
if (!Number.isFinite(parsed) || parsed < 0) return null;
|
|
104
|
-
return Math.floor(parsed);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
function resolveStartMsFromEvents(segment, eventsLog = []) {
|
|
108
|
-
const phase = normalizeText(segment?.phase ?? segment?.phase_id ?? segment?.phaseId);
|
|
109
|
-
if (!phase) return null;
|
|
110
|
-
|
|
111
|
-
let candidate = null;
|
|
112
|
-
for (const event of eventsLog) {
|
|
113
|
-
const eventPhase = normalizeText(event?.phase ?? event?.phase_id ?? event?.phaseId);
|
|
114
|
-
if (!eventPhase || eventPhase !== phase) continue;
|
|
115
|
-
|
|
116
|
-
const eventStart = normalizeStartMs(
|
|
117
|
-
event?.t_ms_start
|
|
118
|
-
?? event?.tMsStart
|
|
119
|
-
?? event?.t_ms
|
|
120
|
-
?? event?.tMs
|
|
121
|
-
);
|
|
122
|
-
if (eventStart == null) continue;
|
|
123
|
-
if (candidate == null || eventStart < candidate) candidate = eventStart;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
return candidate;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function normalizeAudioSegments(audioSegments = [], eventsLog = []) {
|
|
130
|
-
if (!Array.isArray(audioSegments)) {
|
|
131
|
-
throw new Error('audio_segments must be an array');
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
return audioSegments.map((segment, index) => {
|
|
135
|
-
if (!segment || typeof segment !== 'object' || Array.isArray(segment)) {
|
|
136
|
-
throw new Error(`audio_segments[${index}] must be an object`);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const audioPath = normalizePath(segment.audio_path ?? segment.audioPath, `audio_segments[${index}].audio_path`);
|
|
140
|
-
const startMs = normalizeStartMs(segment.start_ms ?? segment.startMs)
|
|
141
|
-
?? resolveStartMsFromEvents(segment, eventsLog);
|
|
142
|
-
if (startMs == null) {
|
|
143
|
-
throw new Error(`audio_segments[${index}].start_ms missing (and no matching events_log entry found)`);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
audioPath,
|
|
148
|
-
startMs,
|
|
149
|
-
};
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function defaultOutputPath(inputPath, suffix) {
|
|
154
|
-
const ext = path.extname(inputPath) || '.mp4';
|
|
155
|
-
const base = path.basename(inputPath, ext);
|
|
156
|
-
return path.join(path.dirname(inputPath), `${base}.${suffix}${ext}`);
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
function escapeConcatPath(filePath) {
|
|
160
|
-
return filePath.replace(/'/g, `'\\''`);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
export async function muxAudioToVideo({
|
|
164
|
-
video_path,
|
|
165
|
-
audio_segments = [],
|
|
166
|
-
events_log = [],
|
|
167
|
-
output = null,
|
|
168
|
-
} = {}) {
|
|
169
|
-
const videoPath = normalizePath(video_path, 'video_path');
|
|
170
|
-
await ensureReadableFile(videoPath, 'video_path');
|
|
171
|
-
|
|
172
|
-
const segments = normalizeAudioSegments(audio_segments, events_log).sort((a, b) => a.startMs - b.startMs);
|
|
173
|
-
for (const segment of segments) {
|
|
174
|
-
await ensureReadableFile(segment.audioPath, `audio segment (${segment.audioPath})`);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const outputPath = output
|
|
178
|
-
? normalizePath(output, 'output')
|
|
179
|
-
: defaultOutputPath(videoPath, 'muxed');
|
|
180
|
-
if (outputPath === videoPath) throw new Error('output must not equal video_path');
|
|
181
|
-
await ensureParentDir(outputPath);
|
|
182
|
-
|
|
183
|
-
if (segments.length === 0) {
|
|
184
|
-
await runProcess('ffmpeg', [
|
|
185
|
-
'-y',
|
|
186
|
-
'-i', videoPath,
|
|
187
|
-
'-map', '0:v:0',
|
|
188
|
-
'-c:v', 'copy',
|
|
189
|
-
'-an',
|
|
190
|
-
outputPath,
|
|
191
|
-
], { name: 'ffmpeg mux(no-audio)' });
|
|
192
|
-
return outputPath;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
const filterChunks = [];
|
|
196
|
-
const mixInputs = [];
|
|
197
|
-
for (let i = 0; i < segments.length; i += 1) {
|
|
198
|
-
const delay = segments[i].startMs;
|
|
199
|
-
const label = `a${i}`;
|
|
200
|
-
filterChunks.push(`[${i + 1}:a]adelay=${delay}|${delay},aresample=async=1:first_pts=0[${label}]`);
|
|
201
|
-
mixInputs.push(`[${label}]`);
|
|
202
|
-
}
|
|
203
|
-
filterChunks.push(`${mixInputs.join('')}amix=inputs=${segments.length}:duration=longest:dropout_transition=0,apad[a]`);
|
|
204
|
-
|
|
205
|
-
const args = [
|
|
206
|
-
'-y',
|
|
207
|
-
'-i', videoPath,
|
|
208
|
-
];
|
|
209
|
-
|
|
210
|
-
for (const segment of segments) {
|
|
211
|
-
args.push('-i', segment.audioPath);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
args.push(
|
|
215
|
-
'-filter_complex', filterChunks.join(';'),
|
|
216
|
-
'-map', '0:v:0',
|
|
217
|
-
'-map', '[a]',
|
|
218
|
-
'-c:v', 'copy',
|
|
219
|
-
'-c:a', 'aac',
|
|
220
|
-
'-shortest',
|
|
221
|
-
'-movflags', '+faststart',
|
|
222
|
-
outputPath
|
|
223
|
-
);
|
|
224
|
-
|
|
225
|
-
await runProcess('ffmpeg', args, { name: 'ffmpeg mux' });
|
|
226
|
-
return outputPath;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
export async function concatVideos({
|
|
230
|
-
inputs = [],
|
|
231
|
-
output,
|
|
232
|
-
} = {}) {
|
|
233
|
-
if (!Array.isArray(inputs) || inputs.length === 0) {
|
|
234
|
-
throw new Error('inputs must be a non-empty array');
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
const normalizedInputs = inputs.map((input, index) => normalizePath(input, `inputs[${index}]`));
|
|
238
|
-
for (const inputPath of normalizedInputs) {
|
|
239
|
-
await ensureReadableFile(inputPath, `concat input (${inputPath})`);
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const outputPath = output
|
|
243
|
-
? normalizePath(output, 'output')
|
|
244
|
-
: defaultOutputPath(normalizedInputs[0], 'concat');
|
|
245
|
-
await ensureParentDir(outputPath);
|
|
246
|
-
|
|
247
|
-
if (normalizedInputs.length === 1) {
|
|
248
|
-
await runProcess('ffmpeg', [
|
|
249
|
-
'-y',
|
|
250
|
-
'-i', normalizedInputs[0],
|
|
251
|
-
'-c', 'copy',
|
|
252
|
-
outputPath,
|
|
253
|
-
], { name: 'ffmpeg concat(single-input)' });
|
|
254
|
-
return outputPath;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
const tempDir = await mkdtemp(path.join(os.tmpdir(), 'lightcone-concat-'));
|
|
258
|
-
const listPath = path.join(tempDir, 'inputs.txt');
|
|
259
|
-
|
|
260
|
-
try {
|
|
261
|
-
const content = normalizedInputs
|
|
262
|
-
.map(inputPath => `file '${escapeConcatPath(inputPath)}'`)
|
|
263
|
-
.join('\n');
|
|
264
|
-
await writeFile(listPath, `${content}\n`, 'utf8');
|
|
265
|
-
|
|
266
|
-
await runProcess('ffmpeg', [
|
|
267
|
-
'-y',
|
|
268
|
-
'-f', 'concat',
|
|
269
|
-
'-safe', '0',
|
|
270
|
-
'-i', listPath,
|
|
271
|
-
'-c', 'copy',
|
|
272
|
-
'-movflags', '+faststart',
|
|
273
|
-
outputPath,
|
|
274
|
-
], { name: 'ffmpeg concat' });
|
|
275
|
-
} finally {
|
|
276
|
-
await rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
return outputPath;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
function resolveTranscodeTarget(target) {
|
|
283
|
-
const normalized = normalizeText(target).toLowerCase();
|
|
284
|
-
if (!normalized) return TRANSCODE_TARGETS.short_video_cn;
|
|
285
|
-
|
|
286
|
-
if (normalized === 'short_video_cn' || normalized === 'douyin' || normalized === 'xhs') {
|
|
287
|
-
return TRANSCODE_TARGETS.short_video_cn;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
throw new Error(`unsupported transcode target: ${target}`);
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
function msToAssTimestamp(ms) {
|
|
294
|
-
const totalCs = Math.round(Math.max(0, ms) / 10);
|
|
295
|
-
const cs = totalCs % 100;
|
|
296
|
-
const totalSec = Math.floor(totalCs / 100);
|
|
297
|
-
const sec = totalSec % 60;
|
|
298
|
-
const totalMin = Math.floor(totalSec / 60);
|
|
299
|
-
const min = totalMin % 60;
|
|
300
|
-
const hr = Math.floor(totalMin / 60);
|
|
301
|
-
return `${hr}:${String(min).padStart(2, '0')}:${String(sec).padStart(2, '0')}.${String(cs).padStart(2, '0')}`;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Hard-wrap CJK subtitle text so it never overflows the video frame.
|
|
305
|
-
// libass WrapStyle:0 doesn't handle Chinese text reliably (no word boundaries),
|
|
306
|
-
// so we insert explicit \N breaks every maxChars characters.
|
|
307
|
-
function wrapSubtitleText(text, maxChars = 14) {
|
|
308
|
-
const chars = Array.from(String(text ?? ''));
|
|
309
|
-
if (chars.length <= maxChars) return chars.join('');
|
|
310
|
-
const lines = [];
|
|
311
|
-
for (let i = 0; i < chars.length; i += maxChars) {
|
|
312
|
-
lines.push(chars.slice(i, i + maxChars).join(''));
|
|
313
|
-
}
|
|
314
|
-
return lines.join('\\N');
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
export function buildAssContent(subtitles = [], { playResX = 1080, playResY = 1920 } = {}) {
|
|
318
|
-
// Max chars per line: (playResX - marginL - marginR) / fontSizePx
|
|
319
|
-
// 1080 - 30 - 30 = 1020px, fontsize 72 ≈ 72px/char → 14 chars
|
|
320
|
-
const maxCharsPerLine = Math.floor((playResX - 60) / SUBTITLE_FONT_SIZE);
|
|
321
|
-
|
|
322
|
-
const header = [
|
|
323
|
-
'[Script Info]',
|
|
324
|
-
'ScriptType: v4.00+',
|
|
325
|
-
`PlayResX: ${playResX}`,
|
|
326
|
-
`PlayResY: ${playResY}`,
|
|
327
|
-
'WrapStyle: 2',
|
|
328
|
-
'',
|
|
329
|
-
'[V4+ Styles]',
|
|
330
|
-
'Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
|
|
331
|
-
`Style: Default,${SUBTITLE_FONT},${SUBTITLE_FONT_SIZE},&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,4,0,2,30,30,${SUBTITLE_MARGIN_V},1`,
|
|
332
|
-
'',
|
|
333
|
-
'[Events]',
|
|
334
|
-
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
|
335
|
-
].join('\n');
|
|
336
|
-
|
|
337
|
-
const events = subtitles.map(({ text, start_ms, end_ms }) => {
|
|
338
|
-
const wrapped = wrapSubtitleText(text, maxCharsPerLine);
|
|
339
|
-
const safeText = wrapped.replace(/\r?\n/g, '\\N').replace(/,/g, '{\\,}');
|
|
340
|
-
return `Dialogue: 0,${msToAssTimestamp(start_ms)},${msToAssTimestamp(end_ms)},Default,,0,0,0,,${safeText}`;
|
|
341
|
-
});
|
|
342
|
-
|
|
343
|
-
return `${header}\n${events.join('\n')}\n`;
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
export async function transcodeForPlatform({
|
|
347
|
-
input,
|
|
348
|
-
output,
|
|
349
|
-
target = 'short_video_cn',
|
|
350
|
-
subtitlesAssPath = null,
|
|
351
|
-
} = {}) {
|
|
352
|
-
const inputPath = normalizePath(input, 'input');
|
|
353
|
-
await ensureReadableFile(inputPath, 'input');
|
|
354
|
-
|
|
355
|
-
const outputPath = output
|
|
356
|
-
? normalizePath(output, 'output')
|
|
357
|
-
: defaultOutputPath(inputPath, 'platform');
|
|
358
|
-
if (outputPath === inputPath) throw new Error('output must not equal input');
|
|
359
|
-
await ensureParentDir(outputPath);
|
|
360
|
-
|
|
361
|
-
const preset = resolveTranscodeTarget(target);
|
|
362
|
-
const vfParts = [
|
|
363
|
-
`scale=${preset.width}:${preset.height}:force_original_aspect_ratio=decrease`,
|
|
364
|
-
`pad=${preset.width}:${preset.height}:(ow-iw)/2:(oh-ih)/2:black`,
|
|
365
|
-
'setsar=1',
|
|
366
|
-
];
|
|
367
|
-
if (subtitlesAssPath) {
|
|
368
|
-
const escapedPath = subtitlesAssPath.replace(/\\/g, '/').replace(/:/g, '\\:').replace(/'/g, "\\'");
|
|
369
|
-
vfParts.push(`subtitles='${escapedPath}'`);
|
|
370
|
-
}
|
|
371
|
-
const vf = vfParts.join(',');
|
|
372
|
-
|
|
373
|
-
await runProcess('ffmpeg', [
|
|
374
|
-
'-y',
|
|
375
|
-
'-i', inputPath,
|
|
376
|
-
'-vf', vf,
|
|
377
|
-
'-r', String(preset.fps),
|
|
378
|
-
'-c:v', preset.videoCodec,
|
|
379
|
-
'-profile:v', preset.profile,
|
|
380
|
-
'-level', preset.level,
|
|
381
|
-
'-pix_fmt', preset.pixelFormat,
|
|
382
|
-
'-preset', preset.preset,
|
|
383
|
-
'-crf', String(preset.crf),
|
|
384
|
-
'-c:a', preset.audioCodec,
|
|
385
|
-
'-b:a', preset.audioBitrate,
|
|
386
|
-
'-ar', String(preset.audioSampleRate),
|
|
387
|
-
'-ac', String(preset.audioChannels),
|
|
388
|
-
'-movflags', '+faststart',
|
|
389
|
-
outputPath,
|
|
390
|
-
], { name: 'ffmpeg transcode' });
|
|
391
|
-
|
|
392
|
-
return outputPath;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
export async function probeDurationMs(inputPath) {
|
|
396
|
-
const resolved = normalizePath(inputPath, 'input');
|
|
397
|
-
await ensureReadableFile(resolved, 'input');
|
|
398
|
-
|
|
399
|
-
const { stdout } = await runProcess('ffprobe', [
|
|
400
|
-
'-v', 'error',
|
|
401
|
-
'-show_entries', 'format=duration',
|
|
402
|
-
'-of', 'default=noprint_wrappers=1:nokey=1',
|
|
403
|
-
resolved,
|
|
404
|
-
], { name: 'ffprobe duration' });
|
|
405
|
-
|
|
406
|
-
const seconds = Number.parseFloat(String(stdout ?? '').trim());
|
|
407
|
-
if (!Number.isFinite(seconds) || seconds <= 0) {
|
|
408
|
-
throw new Error(`ffprobe returned invalid duration for ${resolved}`);
|
|
409
|
-
}
|
|
410
|
-
return Math.floor(seconds * 1000);
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
export async function readMediaSpec(inputPath) {
|
|
414
|
-
const resolved = normalizePath(inputPath, 'input');
|
|
415
|
-
await ensureReadableFile(resolved, 'input');
|
|
416
|
-
|
|
417
|
-
const { stdout } = await runProcess('ffprobe', [
|
|
418
|
-
'-v', 'error',
|
|
419
|
-
'-select_streams', 'v:0',
|
|
420
|
-
'-show_entries', 'stream=width,height,r_frame_rate,pix_fmt,codec_name',
|
|
421
|
-
'-of', 'json',
|
|
422
|
-
resolved,
|
|
423
|
-
], { name: 'ffprobe spec' });
|
|
424
|
-
|
|
425
|
-
let parsed;
|
|
426
|
-
try {
|
|
427
|
-
parsed = JSON.parse(stdout);
|
|
428
|
-
} catch {
|
|
429
|
-
throw new Error(`Failed to parse ffprobe spec output for ${resolved}`);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
const stream = parsed?.streams?.[0] ?? {};
|
|
433
|
-
return {
|
|
434
|
-
width: Number(stream.width) || null,
|
|
435
|
-
height: Number(stream.height) || null,
|
|
436
|
-
frame_rate: String(stream.r_frame_rate ?? ''),
|
|
437
|
-
pixel_format: String(stream.pix_fmt ?? ''),
|
|
438
|
-
video_codec: String(stream.codec_name ?? ''),
|
|
439
|
-
};
|
|
440
|
-
}
|