mulmocast 2.0.8 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/movie.d.ts +29 -2
- package/lib/actions/movie.js +243 -91
- package/lib/methods/mulmo_presentation_style.d.ts +4 -3
- package/lib/methods/mulmo_presentation_style.js +7 -1
- package/lib/types/schema.d.ts +202 -31
- package/lib/types/schema.js +27 -13
- package/lib/types/schema_video_filter.d.ts +423 -0
- package/lib/types/schema_video_filter.js +253 -0
- package/lib/types/type.d.ts +2 -1
- package/lib/utils/context.d.ts +11 -3
- package/lib/utils/ffmpeg_utils.js +2 -2
- package/lib/utils/provider2agent.d.ts +4 -0
- package/lib/utils/provider2agent.js +5 -0
- package/lib/utils/utils.js +24 -55
- package/lib/utils/video_filter.d.ts +7 -0
- package/lib/utils/video_filter.js +149 -0
- package/package.json +7 -7
- package/scripts/test/README.md +48 -48
- package/scripts/test/test_transition2.json +460 -0
- package/scripts/test/test_transition2.json~ +62 -0
- package/scripts/test/test_transition3.json +70 -0
- package/scripts/test/test_transition3.json~ +76 -0
- package/scripts/test/test_transition_no_audio.json +16 -0
- package/scripts/test/test_video_filters.json~ +227 -0
- package/scripts/test/test_wipe_simple.json +37 -0
- package/scripts/test/test_all_image.json~ +0 -45
- package/scripts/test/test_all_movie.json~ +0 -37
- package/scripts/test/test_all_tts.json~ +0 -83
- package/scripts/test/test_audio_gemini.json~ +0 -67
- package/scripts/test/test_genai2.json~ +0 -84
- package/scripts/test/test_genai_movie.json~ +0 -22
- package/scripts/test/test_kotodama.json~ +0 -0
- package/scripts/test/test_lipsync2.json~ +0 -24
- package/scripts/test/test_movie2.json~ +0 -40
- package/scripts/test/test_play_to_end.json~ +0 -65
package/lib/actions/movie.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import { MulmoStudioContext,
|
|
2
|
-
|
|
1
|
+
import { MulmoStudioContext, MulmoBeat, MulmoTransition, MulmoCanvasDimension, MulmoFillOption } from "../types/index.js";
|
|
2
|
+
import { FfmpegContext } from "../utils/ffmpeg_utils.js";
|
|
3
|
+
type VideoId = string | undefined;
|
|
4
|
+
export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
|
|
3
5
|
videoId: string;
|
|
4
6
|
videoPart: string;
|
|
5
7
|
};
|
|
@@ -7,5 +9,30 @@ export declare const getAudioPart: (inputIndex: number, duration: number, delay:
|
|
|
7
9
|
audioId: string;
|
|
8
10
|
audioPart: string;
|
|
9
11
|
};
|
|
12
|
+
export declare const getOutOverlayCoords: (transitionType: string, d: number, t: number) => string;
|
|
13
|
+
export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
|
|
14
|
+
export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
|
|
15
|
+
export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
|
|
16
|
+
export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
|
|
17
|
+
export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
|
|
18
|
+
style: "aspectFit" | "aspectFill";
|
|
19
|
+
};
|
|
20
|
+
export declare const getTransitionVideoId: (transition: MulmoTransition, videoIdsForBeats: VideoId[], index: number) => {
|
|
21
|
+
videoId: string;
|
|
22
|
+
nextVideoId: undefined;
|
|
23
|
+
beatIndex: number;
|
|
24
|
+
} | {
|
|
25
|
+
videoId: string;
|
|
26
|
+
nextVideoId: string;
|
|
27
|
+
beatIndex: number;
|
|
28
|
+
};
|
|
29
|
+
export declare const getConcatVideoFilter: (concatVideoId: string, videoIdsForBeats: VideoId[]) => string;
|
|
30
|
+
export declare const validateBeatSource: (studioBeat: MulmoStudioContext["studio"]["beats"][number], index: number) => string;
|
|
31
|
+
export declare const addSplitAndExtractFrames: (ffmpegContext: FfmpegContext, videoId: string, duration: number, isMovie: boolean, needFirst: boolean, needLast: boolean, canvasInfo: {
|
|
32
|
+
width: number;
|
|
33
|
+
height: number;
|
|
34
|
+
}) => void;
|
|
35
|
+
export declare const createVideo: (audioArtifactFilePath: string, outputVideoPath: string, context: MulmoStudioContext, isTest?: boolean) => Promise<boolean | string[]>;
|
|
10
36
|
export declare const movieFilePath: (context: MulmoStudioContext) => string;
|
|
11
37
|
export declare const movie: (context: MulmoStudioContext) => Promise<boolean>;
|
|
38
|
+
export {};
|
package/lib/actions/movie.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { GraphAILogger, assert } from "graphai";
|
|
2
|
-
import {
|
|
2
|
+
import { mulmoFillOptionSchema } from "../types/index.js";
|
|
3
3
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
4
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage, isFile } from "../utils/file.js";
|
|
5
5
|
import { createVideoFileError, createVideoSourceError } from "../utils/error_cause.js";
|
|
@@ -7,27 +7,27 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAud
|
|
|
7
7
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
8
8
|
// const isMac = process.platform === "darwin";
|
|
9
9
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
10
|
-
export const getVideoPart = (inputIndex,
|
|
10
|
+
export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed) => {
|
|
11
11
|
const videoId = `v${inputIndex}`;
|
|
12
12
|
const videoFilters = [];
|
|
13
13
|
// Handle different media types
|
|
14
14
|
const originalDuration = duration * speed;
|
|
15
|
-
if (
|
|
16
|
-
videoFilters.push("loop=loop=-1:size=1:start=0");
|
|
17
|
-
}
|
|
18
|
-
else if (mediaType === "movie") {
|
|
15
|
+
if (isMovie) {
|
|
19
16
|
// For videos, extend with last frame if shorter than required duration
|
|
20
17
|
// tpad will extend the video by cloning the last frame, then trim will ensure exact duration
|
|
21
18
|
videoFilters.push(`tpad=stop_mode=clone:stop_duration=${originalDuration * 2}`); // Use 2x duration to ensure coverage
|
|
22
19
|
}
|
|
20
|
+
else {
|
|
21
|
+
videoFilters.push("loop=loop=-1:size=1:start=0");
|
|
22
|
+
}
|
|
23
23
|
// Common filters for all media types
|
|
24
24
|
videoFilters.push(`trim=duration=${originalDuration}`, "fps=30");
|
|
25
25
|
// Apply speed if specified
|
|
26
|
-
if (speed
|
|
27
|
-
videoFilters.push(
|
|
26
|
+
if (speed === 1.0) {
|
|
27
|
+
videoFilters.push("setpts=PTS-STARTPTS");
|
|
28
28
|
}
|
|
29
29
|
else {
|
|
30
|
-
videoFilters.push(
|
|
30
|
+
videoFilters.push(`setpts=${1 / speed}*PTS`);
|
|
31
31
|
}
|
|
32
32
|
// Apply scaling based on fill option
|
|
33
33
|
if (fillOption.style === "aspectFill") {
|
|
@@ -43,7 +43,7 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOp
|
|
|
43
43
|
videoFilters.push("setsar=1", "format=yuv420p");
|
|
44
44
|
return {
|
|
45
45
|
videoId,
|
|
46
|
-
videoPart: `[${inputIndex}:v]` + videoFilters.
|
|
46
|
+
videoPart: `[${inputIndex}:v]` + videoFilters.join(",") + `[${videoId}]`,
|
|
47
47
|
};
|
|
48
48
|
};
|
|
49
49
|
export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
|
|
@@ -82,47 +82,138 @@ const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
|
|
|
82
82
|
const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
|
|
83
83
|
if (caption && beatsWithCaptions.length > 0) {
|
|
84
84
|
const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
|
|
85
|
-
return beatsWithCaptions.reduce((
|
|
85
|
+
return beatsWithCaptions.reduce((prevVideoId, beat, index) => {
|
|
86
86
|
const { startAt, duration, captionFile } = beat;
|
|
87
87
|
if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
|
|
88
88
|
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
|
|
89
89
|
const compositeVideoId = `oc${index}`;
|
|
90
|
-
ffmpegContext.filterComplex.push(`[${
|
|
90
|
+
ffmpegContext.filterComplex.push(`[${prevVideoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
|
|
91
91
|
return compositeVideoId;
|
|
92
92
|
}
|
|
93
|
-
return
|
|
93
|
+
return prevVideoId;
|
|
94
94
|
}, concatVideoId);
|
|
95
95
|
}
|
|
96
96
|
return concatVideoId;
|
|
97
97
|
};
|
|
98
|
-
const
|
|
99
|
-
if (
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
98
|
+
export const getOutOverlayCoords = (transitionType, d, t) => {
|
|
99
|
+
if (transitionType === "slideout_left") {
|
|
100
|
+
return `x='-(t-${t})*W/${d}':y=0`;
|
|
101
|
+
}
|
|
102
|
+
else if (transitionType === "slideout_right") {
|
|
103
|
+
return `x='(t-${t})*W/${d}':y=0`;
|
|
104
|
+
}
|
|
105
|
+
else if (transitionType === "slideout_up") {
|
|
106
|
+
return `x=0:y='-(t-${t})*H/${d}'`;
|
|
107
|
+
}
|
|
108
|
+
else if (transitionType === "slideout_down") {
|
|
109
|
+
return `x=0:y='(t-${t})*H/${d}'`;
|
|
110
|
+
}
|
|
111
|
+
throw new Error(`Unknown transition type: ${transitionType}`);
|
|
112
|
+
};
|
|
113
|
+
export const getInOverlayCoords = (transitionType, d, t) => {
|
|
114
|
+
if (transitionType === "slidein_left") {
|
|
115
|
+
return `x='-W+(t-${t})*W/${d}':y=0`;
|
|
116
|
+
}
|
|
117
|
+
else if (transitionType === "slidein_right") {
|
|
118
|
+
return `x='W-(t-${t})*W/${d}':y=0`;
|
|
119
|
+
}
|
|
120
|
+
else if (transitionType === "slidein_up") {
|
|
121
|
+
return `x=0:y='H-(t-${t})*H/${d}'`;
|
|
122
|
+
}
|
|
123
|
+
else if (transitionType === "slidein_down") {
|
|
124
|
+
return `x=0:y='-H+(t-${t})*H/${d}'`;
|
|
125
|
+
}
|
|
126
|
+
throw new Error(`Unknown transition type: ${transitionType}`);
|
|
127
|
+
};
|
|
128
|
+
const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats) => {
|
|
129
|
+
if (transitionVideoIds.length === 0) {
|
|
130
|
+
return captionedVideoId;
|
|
131
|
+
}
|
|
132
|
+
return transitionVideoIds.reduce((prevVideoId, { videoId: transitionVideoId, nextVideoId, beatIndex }) => {
|
|
133
|
+
const beat = context.studio.script.beats[beatIndex];
|
|
134
|
+
const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
|
|
135
|
+
if (!transition) {
|
|
136
|
+
return prevVideoId; // Skip if no transition is defined
|
|
137
|
+
}
|
|
138
|
+
// Transition happens at the start of this beat
|
|
139
|
+
const startAt = beatTimestamps[beatIndex] - 0.05; // 0.05 is to avoid flickering
|
|
140
|
+
const duration = transition.duration;
|
|
141
|
+
const outputVideoId = `trans_${beatIndex}_o`;
|
|
142
|
+
const processedVideoId = `${transitionVideoId}_f`;
|
|
143
|
+
if (transition.type === "fade") {
|
|
144
|
+
// Fade out the previous beat's last frame
|
|
145
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${duration}:alpha=1,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
|
|
146
|
+
ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
|
|
147
|
+
}
|
|
148
|
+
else if (transition.type.startsWith("slideout_")) {
|
|
149
|
+
// Slideout: previous beat's last frame slides out
|
|
150
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
|
|
151
|
+
ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=${getOutOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
|
|
152
|
+
}
|
|
153
|
+
else if (transition.type.startsWith("slidein_")) {
|
|
154
|
+
// Slidein: this beat's first frame slides in over the previous beat's last frame
|
|
155
|
+
if (!nextVideoId) {
|
|
156
|
+
// Cannot apply slidein without first frame
|
|
157
|
+
return prevVideoId;
|
|
118
158
|
}
|
|
119
|
-
|
|
120
|
-
|
|
159
|
+
// Get previous beat's last frame for background
|
|
160
|
+
const prevVideoSourceId = videoIdsForBeats[beatIndex - 1];
|
|
161
|
+
// Both movie and image beats now have _last
|
|
162
|
+
const prevLastFrame = `${prevVideoSourceId}_last`;
|
|
163
|
+
// Prepare background (last frame of previous beat)
|
|
164
|
+
const backgroundVideoId = `${prevLastFrame}_bg`;
|
|
165
|
+
ffmpegContext.filterComplex.push(`[${prevLastFrame}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${backgroundVideoId}]`);
|
|
166
|
+
// Prepare sliding frame (first frame of this beat)
|
|
167
|
+
const slideinFrameId = `${nextVideoId}_f`;
|
|
168
|
+
ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${slideinFrameId}]`);
|
|
169
|
+
// First overlay: put background on top of concat video
|
|
170
|
+
const bgOutputId = `${prevLastFrame}_bg_o`;
|
|
171
|
+
ffmpegContext.filterComplex.push(`[${prevVideoId}][${backgroundVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${bgOutputId}]`);
|
|
172
|
+
// Second overlay: slide in the new frame on top of background
|
|
173
|
+
ffmpegContext.filterComplex.push(`[${bgOutputId}][${slideinFrameId}]overlay=${getInOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
|
|
174
|
+
}
|
|
175
|
+
else if (transition.type.startsWith("wipe")) {
|
|
176
|
+
// Wipe transition: use xfade filter between previous beat's last frame and this beat's first frame
|
|
177
|
+
if (!nextVideoId) {
|
|
178
|
+
// Cannot apply wipe without first frame
|
|
179
|
+
return prevVideoId;
|
|
121
180
|
}
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
181
|
+
// Use xfade offset instead of trimming to avoid framerate issues
|
|
182
|
+
// The static frames are created with proper duration, use offset to start transition at the right time
|
|
183
|
+
const prevBeatDuration = context.studio.beats[beatIndex - 1].duration ?? 0;
|
|
184
|
+
const xfadeOffset = prevBeatDuration - duration;
|
|
185
|
+
// Apply xfade with explicit pixel format
|
|
186
|
+
const xfadeOutputId = `${transitionVideoId}_xfade`;
|
|
187
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuv420p[${transitionVideoId}_fmt]`);
|
|
188
|
+
ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuv420p[${nextVideoId}_fmt]`);
|
|
189
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}_fmt][${nextVideoId}_fmt]xfade=transition=${transition.type}:duration=${duration}:offset=${xfadeOffset}[${xfadeOutputId}]`);
|
|
190
|
+
// Set PTS for overlay timing
|
|
191
|
+
const xfadeTimedId = `${xfadeOutputId}_t`;
|
|
192
|
+
ffmpegContext.filterComplex.push(`[${xfadeOutputId}]setpts=PTS-STARTPTS+${startAt}/TB[${xfadeTimedId}]`);
|
|
193
|
+
// Overlay the xfade result on the concat video
|
|
194
|
+
ffmpegContext.filterComplex.push(`[${prevVideoId}][${xfadeTimedId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
throw new Error(`Unknown transition type: ${transition.type}`);
|
|
198
|
+
}
|
|
199
|
+
return outputVideoId;
|
|
200
|
+
}, captionedVideoId);
|
|
201
|
+
};
|
|
202
|
+
export const getNeedFirstFrame = (context) => {
|
|
203
|
+
return context.studio.script.beats.map((beat, index) => {
|
|
204
|
+
if (index === 0)
|
|
205
|
+
return false; // First beat cannot have transition
|
|
206
|
+
const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
|
|
207
|
+
return (transition?.type.startsWith("slidein_") || transition?.type.startsWith("wipe")) ?? false;
|
|
208
|
+
});
|
|
209
|
+
};
|
|
210
|
+
export const getNeedLastFrame = (context) => {
|
|
211
|
+
return context.studio.script.beats.map((beat, index) => {
|
|
212
|
+
if (index === context.studio.script.beats.length - 1)
|
|
213
|
+
return false; // Last beat doesn't need _last
|
|
214
|
+
const nextTransition = MulmoPresentationStyleMethods.getMovieTransition(context, context.studio.script.beats[index + 1]);
|
|
215
|
+
return nextTransition !== null; // Any transition on next beat requires this beat's last frame
|
|
216
|
+
});
|
|
126
217
|
};
|
|
127
218
|
const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
|
|
128
219
|
if (audioIdsFromMovieBeats.length > 0) {
|
|
@@ -135,17 +226,96 @@ const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMov
|
|
|
135
226
|
}
|
|
136
227
|
return artifactAudioId;
|
|
137
228
|
};
|
|
138
|
-
const
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
229
|
+
export const getExtraPadding = (context, index) => {
|
|
230
|
+
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
231
|
+
if (index === 0) {
|
|
232
|
+
return MulmoStudioContextMethods.getIntroPadding(context);
|
|
233
|
+
}
|
|
234
|
+
else if (index === context.studio.beats.length - 1) {
|
|
235
|
+
return context.presentationStyle.audioParams.outroPadding;
|
|
236
|
+
}
|
|
237
|
+
return 0;
|
|
238
|
+
};
|
|
239
|
+
export const getFillOption = (context, beat) => {
|
|
240
|
+
// Get fillOption from merged imageParams (global + beat-specific)
|
|
241
|
+
const globalFillOption = context.presentationStyle.movieParams?.fillOption;
|
|
242
|
+
const beatFillOption = beat.movieParams?.fillOption;
|
|
243
|
+
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
244
|
+
return { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
245
|
+
};
|
|
246
|
+
export const getTransitionVideoId = (transition, videoIdsForBeats, index) => {
|
|
247
|
+
if (transition.type === "fade" || transition.type.startsWith("slideout_")) {
|
|
248
|
+
// Use previous beat's last frame. TODO: support voice-over
|
|
249
|
+
const prevVideoSourceId = videoIdsForBeats[index - 1];
|
|
250
|
+
// Both movie and image beats now have _last
|
|
251
|
+
const frameId = `${prevVideoSourceId}_last`;
|
|
252
|
+
return { videoId: frameId, nextVideoId: undefined, beatIndex: index };
|
|
253
|
+
}
|
|
254
|
+
if (transition.type.startsWith("wipe")) {
|
|
255
|
+
// Wipe needs both previous beat's last frame and this beat's first frame
|
|
256
|
+
const prevVideoSourceId = videoIdsForBeats[index - 1];
|
|
257
|
+
const prevLastFrame = `${prevVideoSourceId}_last`;
|
|
258
|
+
const nextFirstFrame = `${videoIdsForBeats[index]}_first`;
|
|
259
|
+
return { videoId: prevLastFrame, nextVideoId: nextFirstFrame, beatIndex: index };
|
|
260
|
+
}
|
|
261
|
+
// Use this beat's first frame. slidein_ case
|
|
262
|
+
return { videoId: "", nextVideoId: `${videoIdsForBeats[index]}_first`, beatIndex: index };
|
|
263
|
+
};
|
|
264
|
+
export const getConcatVideoFilter = (concatVideoId, videoIdsForBeats) => {
|
|
265
|
+
const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
|
|
266
|
+
const inputs = videoIds.map((id) => `[${id}]`).join("");
|
|
267
|
+
return `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
|
|
268
|
+
};
|
|
269
|
+
export const validateBeatSource = (studioBeat, index) => {
|
|
270
|
+
const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
|
|
271
|
+
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
|
|
272
|
+
assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
|
|
273
|
+
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
274
|
+
return sourceFile;
|
|
275
|
+
};
|
|
276
|
+
export const addSplitAndExtractFrames = (ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo) => {
|
|
277
|
+
const outputs = [`[${videoId}]`];
|
|
278
|
+
if (needFirst)
|
|
279
|
+
outputs.push(`[${videoId}_first_src]`);
|
|
280
|
+
if (needLast)
|
|
281
|
+
outputs.push(`[${videoId}_last_src]`);
|
|
282
|
+
ffmpegContext.filterComplex.push(`[${videoId}]split=${outputs.length}${outputs.join("")}`);
|
|
283
|
+
if (needFirst) {
|
|
284
|
+
// Create static frame using nullsrc as base for proper framerate/timebase
|
|
285
|
+
// Note: setpts must NOT be used here as it loses framerate metadata needed by xfade
|
|
286
|
+
ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_first_null]`);
|
|
287
|
+
ffmpegContext.filterComplex.push(`[${videoId}_first_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_first_frame]`);
|
|
288
|
+
ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=30[${videoId}_first]`);
|
|
289
|
+
}
|
|
290
|
+
if (needLast) {
|
|
291
|
+
if (isMovie) {
|
|
292
|
+
// Movie beats: extract actual last frame
|
|
293
|
+
ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
|
|
294
|
+
ffmpegContext.filterComplex.push(`[${videoId}_last_src]reverse,select='eq(n,0)',reverse,scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
|
|
295
|
+
ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
|
|
296
|
+
}
|
|
297
|
+
else {
|
|
298
|
+
// Image beats: all frames are identical, so just select one
|
|
299
|
+
ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
|
|
300
|
+
ffmpegContext.filterComplex.push(`[${videoId}_last_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
|
|
301
|
+
ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
};
|
|
305
|
+
const findMissingIndex = (context) => {
|
|
306
|
+
return context.studio.beats.findIndex((studioBeat, index) => {
|
|
143
307
|
const beat = context.studio.script.beats[index];
|
|
144
308
|
if (beat.image?.type === "voice_over") {
|
|
145
309
|
return false; // Voice-over does not have either imageFile or movieFile.
|
|
146
310
|
}
|
|
147
311
|
return !studioBeat.imageFile && !studioBeat.movieFile;
|
|
148
312
|
});
|
|
313
|
+
};
|
|
314
|
+
export const createVideo = async (audioArtifactFilePath, outputVideoPath, context, isTest = false) => {
|
|
315
|
+
const caption = MulmoStudioContextMethods.getCaption(context);
|
|
316
|
+
const start = performance.now();
|
|
317
|
+
const ffmpegContext = FfmpegContextInit();
|
|
318
|
+
const missingIndex = findMissingIndex(context);
|
|
149
319
|
if (missingIndex !== -1) {
|
|
150
320
|
GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
|
|
151
321
|
return false;
|
|
@@ -156,6 +326,10 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
156
326
|
const audioIdsFromMovieBeats = [];
|
|
157
327
|
const transitionVideoIds = [];
|
|
158
328
|
const beatTimestamps = [];
|
|
329
|
+
// Check which beats need _first (for slidein transition on this beat)
|
|
330
|
+
const needsFirstFrame = getNeedFirstFrame(context);
|
|
331
|
+
// Check which beats need _last (for any transition on next beat - they all need previous beat's last frame)
|
|
332
|
+
const needsLastFrame = getNeedLastFrame(context);
|
|
159
333
|
context.studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
160
334
|
const beat = context.studio.script.beats[index];
|
|
161
335
|
if (beat.image?.type === "voice_over") {
|
|
@@ -163,47 +337,28 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
163
337
|
beatTimestamps.push(timestamp);
|
|
164
338
|
return timestamp; // Skip voice-over beats.
|
|
165
339
|
}
|
|
166
|
-
const sourceFile =
|
|
167
|
-
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
|
|
168
|
-
assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
|
|
169
|
-
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
170
|
-
const extraPadding = (() => {
|
|
171
|
-
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
172
|
-
if (index === 0) {
|
|
173
|
-
return MulmoStudioContextMethods.getIntroPadding(context);
|
|
174
|
-
}
|
|
175
|
-
else if (index === context.studio.beats.length - 1) {
|
|
176
|
-
return context.presentationStyle.audioParams.outroPadding;
|
|
177
|
-
}
|
|
178
|
-
return 0;
|
|
179
|
-
})();
|
|
340
|
+
const sourceFile = isTest ? "/test/dummy.mp4" : validateBeatSource(studioBeat, index);
|
|
180
341
|
// The movie duration is bigger in case of voice-over.
|
|
181
|
-
const duration = Math.max(studioBeat.duration +
|
|
182
|
-
// Get fillOption from merged imageParams (global + beat-specific)
|
|
183
|
-
const globalFillOption = context.presentationStyle.movieParams?.fillOption;
|
|
184
|
-
const beatFillOption = beat.movieParams?.fillOption;
|
|
185
|
-
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
186
|
-
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
342
|
+
const duration = Math.max(studioBeat.duration + getExtraPadding(context, index), studioBeat.movieDuration ?? 0);
|
|
187
343
|
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
188
|
-
const
|
|
344
|
+
const isMovie = !!(studioBeat.lipSyncFile ||
|
|
345
|
+
studioBeat.movieFile ||
|
|
346
|
+
MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat) === "movie");
|
|
189
347
|
const speed = beat.movieParams?.speed ?? 1.0;
|
|
190
|
-
const { videoId, videoPart } = getVideoPart(inputIndex,
|
|
348
|
+
const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed);
|
|
191
349
|
ffmpegContext.filterComplex.push(videoPart);
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
ffmpegContext.filterComplex.push(`[${videoId}_1]reverse,select='eq(n,0)',reverse,tpad=stop_mode=clone:stop_duration=${duration},fps=30,setpts=PTS-STARTPTS[${videoId}_2]`);
|
|
199
|
-
transitionVideoIds.push(`${videoId}_2`);
|
|
200
|
-
}
|
|
201
|
-
else {
|
|
202
|
-
transitionVideoIds.push(`${videoId}_1`);
|
|
203
|
-
}
|
|
350
|
+
// for transition
|
|
351
|
+
const needFirst = needsFirstFrame[index]; // This beat has slidein
|
|
352
|
+
const needLast = needsLastFrame[index]; // Next beat has transition
|
|
353
|
+
videoIdsForBeats.push(videoId);
|
|
354
|
+
if (needFirst || needLast) {
|
|
355
|
+
addSplitAndExtractFrames(ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo);
|
|
204
356
|
}
|
|
205
|
-
|
|
206
|
-
|
|
357
|
+
// Record transition info if this beat has a transition
|
|
358
|
+
const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
|
|
359
|
+
if (transition && index > 0) {
|
|
360
|
+
const transitionVideoId = getTransitionVideoId(transition, videoIdsForBeats, index);
|
|
361
|
+
transitionVideoIds.push(transitionVideoId);
|
|
207
362
|
}
|
|
208
363
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
209
364
|
const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
|
|
@@ -218,23 +373,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
218
373
|
}, 0);
|
|
219
374
|
assert(videoIdsForBeats.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
220
375
|
assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
221
|
-
// console.log("*** images", images.audioIds);
|
|
222
376
|
// Concatenate the trimmed images
|
|
223
377
|
const concatVideoId = "concat_video";
|
|
224
|
-
|
|
225
|
-
const inputs = videoIds.map((id) => `[${id}]`).join("");
|
|
226
|
-
const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
|
|
227
|
-
ffmpegContext.filterComplex.push(filter);
|
|
378
|
+
ffmpegContext.filterComplex.push(getConcatVideoFilter(concatVideoId, videoIdsForBeats));
|
|
228
379
|
const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
|
|
229
|
-
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
|
|
380
|
+
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats);
|
|
381
|
+
if (isTest) {
|
|
382
|
+
return ffmpegContext.filterComplex;
|
|
383
|
+
}
|
|
230
384
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
231
385
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
232
|
-
const
|
|
233
|
-
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, artifactAudioId, audioIdsFromMovieBeats);
|
|
234
|
-
// GraphAILogger.debug("filterComplex", ffmpegContext.filterComplex);
|
|
386
|
+
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
|
|
235
387
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
236
|
-
const
|
|
237
|
-
GraphAILogger.info(`Video created successfully! ${Math.round(
|
|
388
|
+
const endTime = performance.now();
|
|
389
|
+
GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
|
|
238
390
|
GraphAILogger.info(context.studio.script.title);
|
|
239
391
|
GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
240
392
|
return true;
|
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
* (No Node.js built-ins like fs, path, dotenv, etc.)
|
|
4
4
|
* Works in both Node.js and modern browsers.
|
|
5
5
|
*/
|
|
6
|
-
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
|
|
6
|
+
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext, MulmoTransition } from "../types/index.js";
|
|
7
7
|
export declare const MulmoPresentationStyleMethods: {
|
|
8
8
|
getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
|
|
9
9
|
getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
|
|
10
10
|
getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
11
11
|
getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
|
|
12
12
|
getSpeaker(context: MulmoStudioContext, beat: MulmoBeat, targetLang: string | undefined): SpeakerData;
|
|
13
|
+
getMovieTransition(context: MulmoStudioContext, beat: MulmoBeat): MulmoTransition | null;
|
|
13
14
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
14
15
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
15
16
|
getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
|
|
@@ -20,11 +21,11 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
20
21
|
fillOption?: {
|
|
21
22
|
style: "aspectFit" | "aspectFill";
|
|
22
23
|
} | undefined;
|
|
23
|
-
speed?: number | undefined;
|
|
24
24
|
transition?: {
|
|
25
|
-
type: "fade" | "slideout_left";
|
|
25
|
+
type: "fade" | "slideout_left" | "slideout_right" | "slideout_up" | "slideout_down" | "slidein_left" | "slidein_right" | "slidein_up" | "slidein_down" | "wipeleft" | "wiperight" | "wipeup" | "wipedown" | "wipetl" | "wipetr" | "wipebl" | "wipebr";
|
|
26
26
|
duration: number;
|
|
27
27
|
} | undefined;
|
|
28
|
+
speed?: number | undefined;
|
|
28
29
|
};
|
|
29
30
|
keyName: string;
|
|
30
31
|
};
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { isNull } from "graphai";
|
|
7
7
|
import { userAssert } from "../utils/utils.js";
|
|
8
|
-
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
|
|
8
|
+
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
|
|
9
9
|
import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../utils/provider2agent.js";
|
|
10
10
|
const defaultTextSlideStyles = [
|
|
11
11
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
@@ -63,6 +63,12 @@ export const MulmoPresentationStyleMethods = {
|
|
|
63
63
|
}
|
|
64
64
|
return speaker;
|
|
65
65
|
},
|
|
66
|
+
getMovieTransition(context, beat) {
|
|
67
|
+
const transitionData = beat.movieParams?.transition ?? context.presentationStyle.movieParams?.transition;
|
|
68
|
+
if (!transitionData)
|
|
69
|
+
return null;
|
|
70
|
+
return mulmoTransitionSchema.parse(transitionData);
|
|
71
|
+
},
|
|
66
72
|
/* NOTE: This method is not used.
|
|
67
73
|
getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
|
|
68
74
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
|