mulmocast 2.0.9 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
- import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
2
- export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
1
+ import { MulmoStudioContext, MulmoBeat, MulmoTransition, MulmoCanvasDimension, MulmoFillOption, MulmoVideoFilter } from "../types/index.js";
2
+ import { FfmpegContext } from "../utils/ffmpeg_utils.js";
3
+ type VideoId = string | undefined;
4
+ export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number, filters?: MulmoVideoFilter[]) => {
3
5
  videoId: string;
4
6
  videoPart: string;
5
7
  };
@@ -7,5 +9,30 @@ export declare const getAudioPart: (inputIndex: number, duration: number, delay:
7
9
  audioId: string;
8
10
  audioPart: string;
9
11
  };
12
+ export declare const getOutOverlayCoords: (transitionType: string, d: number, t: number) => string;
13
+ export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
14
+ export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
15
+ export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
16
+ export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
17
+ export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
18
+ style: "aspectFit" | "aspectFill";
19
+ };
20
+ export declare const getTransitionVideoId: (transition: MulmoTransition, videoIdsForBeats: VideoId[], index: number) => {
21
+ videoId: string;
22
+ nextVideoId: undefined;
23
+ beatIndex: number;
24
+ } | {
25
+ videoId: string;
26
+ nextVideoId: string;
27
+ beatIndex: number;
28
+ };
29
+ export declare const getConcatVideoFilter: (concatVideoId: string, videoIdsForBeats: VideoId[]) => string;
30
+ export declare const validateBeatSource: (studioBeat: MulmoStudioContext["studio"]["beats"][number], index: number) => string;
31
+ export declare const addSplitAndExtractFrames: (ffmpegContext: FfmpegContext, videoId: string, duration: number, isMovie: boolean, needFirst: boolean, needLast: boolean, canvasInfo: {
32
+ width: number;
33
+ height: number;
34
+ }) => void;
35
+ export declare const createVideo: (audioArtifactFilePath: string, outputVideoPath: string, context: MulmoStudioContext, isTest?: boolean) => Promise<boolean | string[]>;
10
36
  export declare const movieFilePath: (context: MulmoStudioContext) => string;
11
37
  export declare const movie: (context: MulmoStudioContext) => Promise<boolean>;
38
+ export {};
@@ -1,33 +1,34 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
2
+ import { mulmoFillOptionSchema, } from "../types/index.js";
3
3
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage, isFile } from "../utils/file.js";
5
5
  import { createVideoFileError, createVideoSourceError } from "../utils/error_cause.js";
6
6
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput, } from "../utils/ffmpeg_utils.js";
7
7
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
8
+ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
8
9
  // const isMac = process.platform === "darwin";
9
10
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
10
- export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption, speed) => {
11
+ export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters) => {
11
12
  const videoId = `v${inputIndex}`;
12
13
  const videoFilters = [];
13
14
  // Handle different media types
14
15
  const originalDuration = duration * speed;
15
- if (mediaType === "image") {
16
- videoFilters.push("loop=loop=-1:size=1:start=0");
17
- }
18
- else if (mediaType === "movie") {
16
+ if (isMovie) {
19
17
  // For videos, extend with last frame if shorter than required duration
20
18
  // tpad will extend the video by cloning the last frame, then trim will ensure exact duration
21
19
  videoFilters.push(`tpad=stop_mode=clone:stop_duration=${originalDuration * 2}`); // Use 2x duration to ensure coverage
22
20
  }
21
+ else {
22
+ videoFilters.push("loop=loop=-1:size=1:start=0");
23
+ }
23
24
  // Common filters for all media types
24
25
  videoFilters.push(`trim=duration=${originalDuration}`, "fps=30");
25
26
  // Apply speed if specified
26
- if (speed !== 1.0) {
27
- videoFilters.push(`setpts=${1 / speed}*PTS`);
27
+ if (speed === 1.0) {
28
+ videoFilters.push("setpts=PTS-STARTPTS");
28
29
  }
29
30
  else {
30
- videoFilters.push("setpts=PTS-STARTPTS");
31
+ videoFilters.push(`setpts=${1 / speed}*PTS`);
31
32
  }
32
33
  // Apply scaling based on fill option
33
34
  if (fillOption.style === "aspectFill") {
@@ -41,9 +42,15 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOp
41
42
  `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
42
43
  }
43
44
  videoFilters.push("setsar=1", "format=yuv420p");
45
+ // Apply custom video filters if specified
46
+ if (filters && filters.length > 0) {
47
+ filters.forEach((filter) => {
48
+ videoFilters.push(convertVideoFilterToFFmpeg(filter));
49
+ });
50
+ }
44
51
  return {
45
52
  videoId,
46
- videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
53
+ videoPart: `[${inputIndex}:v]` + videoFilters.join(",") + `[${videoId}]`,
47
54
  };
48
55
  };
49
56
  export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
@@ -82,47 +89,138 @@ const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
82
89
  const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
83
90
  if (caption && beatsWithCaptions.length > 0) {
84
91
  const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
85
- return beatsWithCaptions.reduce((acc, beat, index) => {
92
+ return beatsWithCaptions.reduce((prevVideoId, beat, index) => {
86
93
  const { startAt, duration, captionFile } = beat;
87
94
  if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
88
95
  const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
89
96
  const compositeVideoId = `oc${index}`;
90
- ffmpegContext.filterComplex.push(`[${acc}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
97
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
91
98
  return compositeVideoId;
92
99
  }
93
- return acc;
100
+ return prevVideoId;
94
101
  }, concatVideoId);
95
102
  }
96
103
  return concatVideoId;
97
104
  };
98
- const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps) => {
99
- if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
100
- const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
101
- return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
102
- const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
103
- const processedVideoId = `${transitionVideoId}_f`;
104
- let transitionFilter;
105
- if (transition.type === "fade") {
106
- transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
107
- }
108
- else if (transition.type === "slideout_left") {
109
- transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
110
- }
111
- else {
112
- throw new Error(`Unknown transition type: ${transition.type}`);
113
- }
114
- ffmpegContext.filterComplex.push(transitionFilter);
115
- const outputId = `${transitionVideoId}_o`;
116
- if (transition.type === "fade") {
117
- ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
105
+ export const getOutOverlayCoords = (transitionType, d, t) => {
106
+ if (transitionType === "slideout_left") {
107
+ return `x='-(t-${t})*W/${d}':y=0`;
108
+ }
109
+ else if (transitionType === "slideout_right") {
110
+ return `x='(t-${t})*W/${d}':y=0`;
111
+ }
112
+ else if (transitionType === "slideout_up") {
113
+ return `x=0:y='-(t-${t})*H/${d}'`;
114
+ }
115
+ else if (transitionType === "slideout_down") {
116
+ return `x=0:y='(t-${t})*H/${d}'`;
117
+ }
118
+ throw new Error(`Unknown transition type: ${transitionType}`);
119
+ };
120
+ export const getInOverlayCoords = (transitionType, d, t) => {
121
+ if (transitionType === "slidein_left") {
122
+ return `x='-W+(t-${t})*W/${d}':y=0`;
123
+ }
124
+ else if (transitionType === "slidein_right") {
125
+ return `x='W-(t-${t})*W/${d}':y=0`;
126
+ }
127
+ else if (transitionType === "slidein_up") {
128
+ return `x=0:y='H-(t-${t})*H/${d}'`;
129
+ }
130
+ else if (transitionType === "slidein_down") {
131
+ return `x=0:y='-H+(t-${t})*H/${d}'`;
132
+ }
133
+ throw new Error(`Unknown transition type: ${transitionType}`);
134
+ };
135
+ const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats) => {
136
+ if (transitionVideoIds.length === 0) {
137
+ return captionedVideoId;
138
+ }
139
+ return transitionVideoIds.reduce((prevVideoId, { videoId: transitionVideoId, nextVideoId, beatIndex }) => {
140
+ const beat = context.studio.script.beats[beatIndex];
141
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
142
+ if (!transition) {
143
+ return prevVideoId; // Skip if no transition is defined
144
+ }
145
+ // Transition happens at the start of this beat
146
+ const startAt = beatTimestamps[beatIndex] - 0.05; // 0.05 is to avoid flickering
147
+ const duration = transition.duration;
148
+ const outputVideoId = `trans_${beatIndex}_o`;
149
+ const processedVideoId = `${transitionVideoId}_f`;
150
+ if (transition.type === "fade") {
151
+ // Fade out the previous beat's last frame
152
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${duration}:alpha=1,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
153
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
154
+ }
155
+ else if (transition.type.startsWith("slideout_")) {
156
+ // Slideout: previous beat's last frame slides out
157
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
158
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=${getOutOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
159
+ }
160
+ else if (transition.type.startsWith("slidein_")) {
161
+ // Slidein: this beat's first frame slides in over the previous beat's last frame
162
+ if (!nextVideoId) {
163
+ // Cannot apply slidein without first frame
164
+ return prevVideoId;
118
165
  }
119
- else if (transition.type === "slideout_left") {
120
- ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
166
+ // Get previous beat's last frame for background
167
+ const prevVideoSourceId = videoIdsForBeats[beatIndex - 1];
168
+ // Both movie and image beats now have _last
169
+ const prevLastFrame = `${prevVideoSourceId}_last`;
170
+ // Prepare background (last frame of previous beat)
171
+ const backgroundVideoId = `${prevLastFrame}_bg`;
172
+ ffmpegContext.filterComplex.push(`[${prevLastFrame}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${backgroundVideoId}]`);
173
+ // Prepare sliding frame (first frame of this beat)
174
+ const slideinFrameId = `${nextVideoId}_f`;
175
+ ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${slideinFrameId}]`);
176
+ // First overlay: put background on top of concat video
177
+ const bgOutputId = `${prevLastFrame}_bg_o`;
178
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${backgroundVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${bgOutputId}]`);
179
+ // Second overlay: slide in the new frame on top of background
180
+ ffmpegContext.filterComplex.push(`[${bgOutputId}][${slideinFrameId}]overlay=${getInOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
181
+ }
182
+ else if (transition.type.startsWith("wipe")) {
183
+ // Wipe transition: use xfade filter between previous beat's last frame and this beat's first frame
184
+ if (!nextVideoId) {
185
+ // Cannot apply wipe without first frame
186
+ return prevVideoId;
121
187
  }
122
- return outputId;
123
- }, captionedVideoId);
124
- }
125
- return captionedVideoId;
188
+ // Use xfade offset instead of trimming to avoid framerate issues
189
+ // The static frames are created with proper duration, use offset to start transition at the right time
190
+ const prevBeatDuration = context.studio.beats[beatIndex - 1].duration ?? 0;
191
+ const xfadeOffset = prevBeatDuration - duration;
192
+ // Apply xfade with explicit pixel format
193
+ const xfadeOutputId = `${transitionVideoId}_xfade`;
194
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuv420p[${transitionVideoId}_fmt]`);
195
+ ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuv420p[${nextVideoId}_fmt]`);
196
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}_fmt][${nextVideoId}_fmt]xfade=transition=${transition.type}:duration=${duration}:offset=${xfadeOffset}[${xfadeOutputId}]`);
197
+ // Set PTS for overlay timing
198
+ const xfadeTimedId = `${xfadeOutputId}_t`;
199
+ ffmpegContext.filterComplex.push(`[${xfadeOutputId}]setpts=PTS-STARTPTS+${startAt}/TB[${xfadeTimedId}]`);
200
+ // Overlay the xfade result on the concat video
201
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${xfadeTimedId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
202
+ }
203
+ else {
204
+ throw new Error(`Unknown transition type: ${transition.type}`);
205
+ }
206
+ return outputVideoId;
207
+ }, captionedVideoId);
208
+ };
209
+ export const getNeedFirstFrame = (context) => {
210
+ return context.studio.script.beats.map((beat, index) => {
211
+ if (index === 0)
212
+ return false; // First beat cannot have transition
213
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
214
+ return (transition?.type.startsWith("slidein_") || transition?.type.startsWith("wipe")) ?? false;
215
+ });
216
+ };
217
+ export const getNeedLastFrame = (context) => {
218
+ return context.studio.script.beats.map((beat, index) => {
219
+ if (index === context.studio.script.beats.length - 1)
220
+ return false; // Last beat doesn't need _last
221
+ const nextTransition = MulmoPresentationStyleMethods.getMovieTransition(context, context.studio.script.beats[index + 1]);
222
+ return nextTransition !== null; // Any transition on next beat requires this beat's last frame
223
+ });
126
224
  };
127
225
  const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
128
226
  if (audioIdsFromMovieBeats.length > 0) {
@@ -135,17 +233,96 @@ const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMov
135
233
  }
136
234
  return artifactAudioId;
137
235
  };
138
- const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
139
- const caption = MulmoStudioContextMethods.getCaption(context);
140
- const start = performance.now();
141
- const ffmpegContext = FfmpegContextInit();
142
- const missingIndex = context.studio.beats.findIndex((studioBeat, index) => {
236
+ export const getExtraPadding = (context, index) => {
237
+ // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
238
+ if (index === 0) {
239
+ return MulmoStudioContextMethods.getIntroPadding(context);
240
+ }
241
+ else if (index === context.studio.beats.length - 1) {
242
+ return context.presentationStyle.audioParams.outroPadding;
243
+ }
244
+ return 0;
245
+ };
246
+ export const getFillOption = (context, beat) => {
247
+ // Get fillOption from merged imageParams (global + beat-specific)
248
+ const globalFillOption = context.presentationStyle.movieParams?.fillOption;
249
+ const beatFillOption = beat.movieParams?.fillOption;
250
+ const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
251
+ return { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
252
+ };
253
+ export const getTransitionVideoId = (transition, videoIdsForBeats, index) => {
254
+ if (transition.type === "fade" || transition.type.startsWith("slideout_")) {
255
+ // Use previous beat's last frame. TODO: support voice-over
256
+ const prevVideoSourceId = videoIdsForBeats[index - 1];
257
+ // Both movie and image beats now have _last
258
+ const frameId = `${prevVideoSourceId}_last`;
259
+ return { videoId: frameId, nextVideoId: undefined, beatIndex: index };
260
+ }
261
+ if (transition.type.startsWith("wipe")) {
262
+ // Wipe needs both previous beat's last frame and this beat's first frame
263
+ const prevVideoSourceId = videoIdsForBeats[index - 1];
264
+ const prevLastFrame = `${prevVideoSourceId}_last`;
265
+ const nextFirstFrame = `${videoIdsForBeats[index]}_first`;
266
+ return { videoId: prevLastFrame, nextVideoId: nextFirstFrame, beatIndex: index };
267
+ }
268
+ // Use this beat's first frame. slidein_ case
269
+ return { videoId: "", nextVideoId: `${videoIdsForBeats[index]}_first`, beatIndex: index };
270
+ };
271
+ export const getConcatVideoFilter = (concatVideoId, videoIdsForBeats) => {
272
+ const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
273
+ const inputs = videoIds.map((id) => `[${id}]`).join("");
274
+ return `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
275
+ };
276
+ export const validateBeatSource = (studioBeat, index) => {
277
+ const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
278
+ assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
279
+ assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
280
+ assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
281
+ return sourceFile;
282
+ };
283
+ export const addSplitAndExtractFrames = (ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo) => {
284
+ const outputs = [`[${videoId}]`];
285
+ if (needFirst)
286
+ outputs.push(`[${videoId}_first_src]`);
287
+ if (needLast)
288
+ outputs.push(`[${videoId}_last_src]`);
289
+ ffmpegContext.filterComplex.push(`[${videoId}]split=${outputs.length}${outputs.join("")}`);
290
+ if (needFirst) {
291
+ // Create static frame using nullsrc as base for proper framerate/timebase
292
+ // Note: setpts must NOT be used here as it loses framerate metadata needed by xfade
293
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_first_null]`);
294
+ ffmpegContext.filterComplex.push(`[${videoId}_first_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_first_frame]`);
295
+ ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=30[${videoId}_first]`);
296
+ }
297
+ if (needLast) {
298
+ if (isMovie) {
299
+ // Movie beats: extract actual last frame
300
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
301
+ ffmpegContext.filterComplex.push(`[${videoId}_last_src]reverse,select='eq(n,0)',reverse,scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
302
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
303
+ }
304
+ else {
305
+ // Image beats: all frames are identical, so just select one
306
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
307
+ ffmpegContext.filterComplex.push(`[${videoId}_last_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
308
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
309
+ }
310
+ }
311
+ };
312
+ const findMissingIndex = (context) => {
313
+ return context.studio.beats.findIndex((studioBeat, index) => {
143
314
  const beat = context.studio.script.beats[index];
144
315
  if (beat.image?.type === "voice_over") {
145
316
  return false; // Voice-over does not have either imageFile or movieFile.
146
317
  }
147
318
  return !studioBeat.imageFile && !studioBeat.movieFile;
148
319
  });
320
+ };
321
+ export const createVideo = async (audioArtifactFilePath, outputVideoPath, context, isTest = false) => {
322
+ const caption = MulmoStudioContextMethods.getCaption(context);
323
+ const start = performance.now();
324
+ const ffmpegContext = FfmpegContextInit();
325
+ const missingIndex = findMissingIndex(context);
149
326
  if (missingIndex !== -1) {
150
327
  GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
151
328
  return false;
@@ -156,6 +333,10 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
156
333
  const audioIdsFromMovieBeats = [];
157
334
  const transitionVideoIds = [];
158
335
  const beatTimestamps = [];
336
+ // Check which beats need _first (for slidein transition on this beat)
337
+ const needsFirstFrame = getNeedFirstFrame(context);
338
+ // Check which beats need _last (for any transition on next beat - they all need previous beat's last frame)
339
+ const needsLastFrame = getNeedLastFrame(context);
159
340
  context.studio.beats.reduce((timestamp, studioBeat, index) => {
160
341
  const beat = context.studio.script.beats[index];
161
342
  if (beat.image?.type === "voice_over") {
@@ -163,47 +344,29 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
163
344
  beatTimestamps.push(timestamp);
164
345
  return timestamp; // Skip voice-over beats.
165
346
  }
166
- const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
167
- assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
168
- assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
169
- assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
170
- const extraPadding = (() => {
171
- // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
172
- if (index === 0) {
173
- return MulmoStudioContextMethods.getIntroPadding(context);
174
- }
175
- else if (index === context.studio.beats.length - 1) {
176
- return context.presentationStyle.audioParams.outroPadding;
177
- }
178
- return 0;
179
- })();
347
+ const sourceFile = isTest ? "/test/dummy.mp4" : validateBeatSource(studioBeat, index);
180
348
  // The movie duration is bigger in case of voice-over.
181
- const duration = Math.max(studioBeat.duration + extraPadding, studioBeat.movieDuration ?? 0);
182
- // Get fillOption from merged imageParams (global + beat-specific)
183
- const globalFillOption = context.presentationStyle.movieParams?.fillOption;
184
- const beatFillOption = beat.movieParams?.fillOption;
185
- const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
186
- const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
349
+ const duration = Math.max(studioBeat.duration + getExtraPadding(context, index), studioBeat.movieDuration ?? 0);
187
350
  const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
188
- const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
351
+ const isMovie = !!(studioBeat.lipSyncFile ||
352
+ studioBeat.movieFile ||
353
+ MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat) === "movie");
189
354
  const speed = beat.movieParams?.speed ?? 1.0;
190
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
355
+ const filters = beat.movieParams?.filters;
356
+ const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed, filters);
191
357
  ffmpegContext.filterComplex.push(videoPart);
192
- if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
193
- // NOTE: We split the video into two parts for transition.
194
- ffmpegContext.filterComplex.push(`[${videoId}]split=2[${videoId}_0][${videoId}_1]`);
195
- videoIdsForBeats.push(`${videoId}_0`);
196
- if (mediaType === "movie") {
197
- // For movie beats, extract the last frame for transition
198
- ffmpegContext.filterComplex.push(`[${videoId}_1]reverse,select='eq(n,0)',reverse,tpad=stop_mode=clone:stop_duration=${duration},fps=30,setpts=PTS-STARTPTS[${videoId}_2]`);
199
- transitionVideoIds.push(`${videoId}_2`);
200
- }
201
- else {
202
- transitionVideoIds.push(`${videoId}_1`);
203
- }
358
+ // for transition
359
+ const needFirst = needsFirstFrame[index]; // This beat has slidein
360
+ const needLast = needsLastFrame[index]; // Next beat has transition
361
+ videoIdsForBeats.push(videoId);
362
+ if (needFirst || needLast) {
363
+ addSplitAndExtractFrames(ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo);
204
364
  }
205
- else {
206
- videoIdsForBeats.push(videoId);
365
+ // Record transition info if this beat has a transition
366
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
367
+ if (transition && index > 0) {
368
+ const transitionVideoId = getTransitionVideoId(transition, videoIdsForBeats, index);
369
+ transitionVideoIds.push(transitionVideoId);
207
370
  }
208
371
  // NOTE: We don't support audio if the speed is not 1.0.
209
372
  const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
@@ -218,23 +381,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
218
381
  }, 0);
219
382
  assert(videoIdsForBeats.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
220
383
  assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
221
- // console.log("*** images", images.audioIds);
222
384
  // Concatenate the trimmed images
223
385
  const concatVideoId = "concat_video";
224
- const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
225
- const inputs = videoIds.map((id) => `[${id}]`).join("");
226
- const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
227
- ffmpegContext.filterComplex.push(filter);
386
+ ffmpegContext.filterComplex.push(getConcatVideoFilter(concatVideoId, videoIdsForBeats));
228
387
  const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
229
- const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
388
+ const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats);
389
+ if (isTest) {
390
+ return ffmpegContext.filterComplex;
391
+ }
230
392
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
231
393
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
232
- const artifactAudioId = `${audioIndex}:a`;
233
- const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, artifactAudioId, audioIdsFromMovieBeats);
234
- // GraphAILogger.debug("filterComplex", ffmpegContext.filterComplex);
394
+ const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
235
395
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
236
- const end = performance.now();
237
- GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
396
+ const endTime = performance.now();
397
+ GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
238
398
  GraphAILogger.info(context.studio.script.title);
239
399
  GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
240
400
  return true;