mulmocast 2.0.8 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/lib/actions/movie.d.ts +29 -2
  2. package/lib/actions/movie.js +243 -91
  3. package/lib/methods/mulmo_presentation_style.d.ts +4 -3
  4. package/lib/methods/mulmo_presentation_style.js +7 -1
  5. package/lib/types/schema.d.ts +202 -31
  6. package/lib/types/schema.js +27 -13
  7. package/lib/types/schema_video_filter.d.ts +423 -0
  8. package/lib/types/schema_video_filter.js +253 -0
  9. package/lib/types/type.d.ts +2 -1
  10. package/lib/utils/context.d.ts +11 -3
  11. package/lib/utils/ffmpeg_utils.js +2 -2
  12. package/lib/utils/provider2agent.d.ts +4 -0
  13. package/lib/utils/provider2agent.js +5 -0
  14. package/lib/utils/utils.js +24 -55
  15. package/lib/utils/video_filter.d.ts +7 -0
  16. package/lib/utils/video_filter.js +149 -0
  17. package/package.json +7 -7
  18. package/scripts/test/README.md +48 -48
  19. package/scripts/test/test_transition2.json +460 -0
  20. package/scripts/test/test_transition2.json~ +62 -0
  21. package/scripts/test/test_transition3.json +70 -0
  22. package/scripts/test/test_transition3.json~ +76 -0
  23. package/scripts/test/test_transition_no_audio.json +16 -0
  24. package/scripts/test/test_video_filters.json~ +227 -0
  25. package/scripts/test/test_wipe_simple.json +37 -0
  26. package/scripts/test/test_all_image.json~ +0 -45
  27. package/scripts/test/test_all_movie.json~ +0 -37
  28. package/scripts/test/test_all_tts.json~ +0 -83
  29. package/scripts/test/test_audio_gemini.json~ +0 -67
  30. package/scripts/test/test_genai2.json~ +0 -84
  31. package/scripts/test/test_genai_movie.json~ +0 -22
  32. package/scripts/test/test_kotodama.json~ +0 -0
  33. package/scripts/test/test_lipsync2.json~ +0 -24
  34. package/scripts/test/test_movie2.json~ +0 -40
  35. package/scripts/test/test_play_to_end.json~ +0 -65
@@ -1,5 +1,7 @@
1
- import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
2
- export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
1
+ import { MulmoStudioContext, MulmoBeat, MulmoTransition, MulmoCanvasDimension, MulmoFillOption } from "../types/index.js";
2
+ import { FfmpegContext } from "../utils/ffmpeg_utils.js";
3
+ type VideoId = string | undefined;
4
+ export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
3
5
  videoId: string;
4
6
  videoPart: string;
5
7
  };
@@ -7,5 +9,30 @@ export declare const getAudioPart: (inputIndex: number, duration: number, delay:
7
9
  audioId: string;
8
10
  audioPart: string;
9
11
  };
12
+ export declare const getOutOverlayCoords: (transitionType: string, d: number, t: number) => string;
13
+ export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
14
+ export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
15
+ export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
16
+ export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
17
+ export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
18
+ style: "aspectFit" | "aspectFill";
19
+ };
20
+ export declare const getTransitionVideoId: (transition: MulmoTransition, videoIdsForBeats: VideoId[], index: number) => {
21
+ videoId: string;
22
+ nextVideoId: undefined;
23
+ beatIndex: number;
24
+ } | {
25
+ videoId: string;
26
+ nextVideoId: string;
27
+ beatIndex: number;
28
+ };
29
+ export declare const getConcatVideoFilter: (concatVideoId: string, videoIdsForBeats: VideoId[]) => string;
30
+ export declare const validateBeatSource: (studioBeat: MulmoStudioContext["studio"]["beats"][number], index: number) => string;
31
+ export declare const addSplitAndExtractFrames: (ffmpegContext: FfmpegContext, videoId: string, duration: number, isMovie: boolean, needFirst: boolean, needLast: boolean, canvasInfo: {
32
+ width: number;
33
+ height: number;
34
+ }) => void;
35
+ export declare const createVideo: (audioArtifactFilePath: string, outputVideoPath: string, context: MulmoStudioContext, isTest?: boolean) => Promise<boolean | string[]>;
10
36
  export declare const movieFilePath: (context: MulmoStudioContext) => string;
11
37
  export declare const movie: (context: MulmoStudioContext) => Promise<boolean>;
38
+ export {};
@@ -1,5 +1,5 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
2
+ import { mulmoFillOptionSchema } from "../types/index.js";
3
3
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage, isFile } from "../utils/file.js";
5
5
  import { createVideoFileError, createVideoSourceError } from "../utils/error_cause.js";
@@ -7,27 +7,27 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAud
7
7
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
8
8
  // const isMac = process.platform === "darwin";
9
9
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
10
- export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption, speed) => {
10
+ export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed) => {
11
11
  const videoId = `v${inputIndex}`;
12
12
  const videoFilters = [];
13
13
  // Handle different media types
14
14
  const originalDuration = duration * speed;
15
- if (mediaType === "image") {
16
- videoFilters.push("loop=loop=-1:size=1:start=0");
17
- }
18
- else if (mediaType === "movie") {
15
+ if (isMovie) {
19
16
  // For videos, extend with last frame if shorter than required duration
20
17
  // tpad will extend the video by cloning the last frame, then trim will ensure exact duration
21
18
  videoFilters.push(`tpad=stop_mode=clone:stop_duration=${originalDuration * 2}`); // Use 2x duration to ensure coverage
22
19
  }
20
+ else {
21
+ videoFilters.push("loop=loop=-1:size=1:start=0");
22
+ }
23
23
  // Common filters for all media types
24
24
  videoFilters.push(`trim=duration=${originalDuration}`, "fps=30");
25
25
  // Apply speed if specified
26
- if (speed !== 1.0) {
27
- videoFilters.push(`setpts=${1 / speed}*PTS`);
26
+ if (speed === 1.0) {
27
+ videoFilters.push("setpts=PTS-STARTPTS");
28
28
  }
29
29
  else {
30
- videoFilters.push("setpts=PTS-STARTPTS");
30
+ videoFilters.push(`setpts=${1 / speed}*PTS`);
31
31
  }
32
32
  // Apply scaling based on fill option
33
33
  if (fillOption.style === "aspectFill") {
@@ -43,7 +43,7 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOp
43
43
  videoFilters.push("setsar=1", "format=yuv420p");
44
44
  return {
45
45
  videoId,
46
- videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
46
+ videoPart: `[${inputIndex}:v]` + videoFilters.join(",") + `[${videoId}]`,
47
47
  };
48
48
  };
49
49
  export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
@@ -82,47 +82,138 @@ const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
82
82
  const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
83
83
  if (caption && beatsWithCaptions.length > 0) {
84
84
  const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
85
- return beatsWithCaptions.reduce((acc, beat, index) => {
85
+ return beatsWithCaptions.reduce((prevVideoId, beat, index) => {
86
86
  const { startAt, duration, captionFile } = beat;
87
87
  if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
88
88
  const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
89
89
  const compositeVideoId = `oc${index}`;
90
- ffmpegContext.filterComplex.push(`[${acc}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
90
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
91
91
  return compositeVideoId;
92
92
  }
93
- return acc;
93
+ return prevVideoId;
94
94
  }, concatVideoId);
95
95
  }
96
96
  return concatVideoId;
97
97
  };
98
- const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps) => {
99
- if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
100
- const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
101
- return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
102
- const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
103
- const processedVideoId = `${transitionVideoId}_f`;
104
- let transitionFilter;
105
- if (transition.type === "fade") {
106
- transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
107
- }
108
- else if (transition.type === "slideout_left") {
109
- transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
110
- }
111
- else {
112
- throw new Error(`Unknown transition type: ${transition.type}`);
113
- }
114
- ffmpegContext.filterComplex.push(transitionFilter);
115
- const outputId = `${transitionVideoId}_o`;
116
- if (transition.type === "fade") {
117
- ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
98
+ export const getOutOverlayCoords = (transitionType, d, t) => {
99
+ if (transitionType === "slideout_left") {
100
+ return `x='-(t-${t})*W/${d}':y=0`;
101
+ }
102
+ else if (transitionType === "slideout_right") {
103
+ return `x='(t-${t})*W/${d}':y=0`;
104
+ }
105
+ else if (transitionType === "slideout_up") {
106
+ return `x=0:y='-(t-${t})*H/${d}'`;
107
+ }
108
+ else if (transitionType === "slideout_down") {
109
+ return `x=0:y='(t-${t})*H/${d}'`;
110
+ }
111
+ throw new Error(`Unknown transition type: ${transitionType}`);
112
+ };
113
+ export const getInOverlayCoords = (transitionType, d, t) => {
114
+ if (transitionType === "slidein_left") {
115
+ return `x='-W+(t-${t})*W/${d}':y=0`;
116
+ }
117
+ else if (transitionType === "slidein_right") {
118
+ return `x='W-(t-${t})*W/${d}':y=0`;
119
+ }
120
+ else if (transitionType === "slidein_up") {
121
+ return `x=0:y='H-(t-${t})*H/${d}'`;
122
+ }
123
+ else if (transitionType === "slidein_down") {
124
+ return `x=0:y='-H+(t-${t})*H/${d}'`;
125
+ }
126
+ throw new Error(`Unknown transition type: ${transitionType}`);
127
+ };
128
+ const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats) => {
129
+ if (transitionVideoIds.length === 0) {
130
+ return captionedVideoId;
131
+ }
132
+ return transitionVideoIds.reduce((prevVideoId, { videoId: transitionVideoId, nextVideoId, beatIndex }) => {
133
+ const beat = context.studio.script.beats[beatIndex];
134
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
135
+ if (!transition) {
136
+ return prevVideoId; // Skip if no transition is defined
137
+ }
138
+ // Transition happens at the start of this beat
139
+ const startAt = beatTimestamps[beatIndex] - 0.05; // 0.05 is to avoid flickering
140
+ const duration = transition.duration;
141
+ const outputVideoId = `trans_${beatIndex}_o`;
142
+ const processedVideoId = `${transitionVideoId}_f`;
143
+ if (transition.type === "fade") {
144
+ // Fade out the previous beat's last frame
145
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${duration}:alpha=1,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
146
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
147
+ }
148
+ else if (transition.type.startsWith("slideout_")) {
149
+ // Slideout: previous beat's last frame slides out
150
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
151
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=${getOutOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
152
+ }
153
+ else if (transition.type.startsWith("slidein_")) {
154
+ // Slidein: this beat's first frame slides in over the previous beat's last frame
155
+ if (!nextVideoId) {
156
+ // Cannot apply slidein without first frame
157
+ return prevVideoId;
118
158
  }
119
- else if (transition.type === "slideout_left") {
120
- ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
159
+ // Get previous beat's last frame for background
160
+ const prevVideoSourceId = videoIdsForBeats[beatIndex - 1];
161
+ // Both movie and image beats now have _last
162
+ const prevLastFrame = `${prevVideoSourceId}_last`;
163
+ // Prepare background (last frame of previous beat)
164
+ const backgroundVideoId = `${prevLastFrame}_bg`;
165
+ ffmpegContext.filterComplex.push(`[${prevLastFrame}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${backgroundVideoId}]`);
166
+ // Prepare sliding frame (first frame of this beat)
167
+ const slideinFrameId = `${nextVideoId}_f`;
168
+ ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${slideinFrameId}]`);
169
+ // First overlay: put background on top of concat video
170
+ const bgOutputId = `${prevLastFrame}_bg_o`;
171
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${backgroundVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${bgOutputId}]`);
172
+ // Second overlay: slide in the new frame on top of background
173
+ ffmpegContext.filterComplex.push(`[${bgOutputId}][${slideinFrameId}]overlay=${getInOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
174
+ }
175
+ else if (transition.type.startsWith("wipe")) {
176
+ // Wipe transition: use xfade filter between previous beat's last frame and this beat's first frame
177
+ if (!nextVideoId) {
178
+ // Cannot apply wipe without first frame
179
+ return prevVideoId;
121
180
  }
122
- return outputId;
123
- }, captionedVideoId);
124
- }
125
- return captionedVideoId;
181
+ // Use xfade offset instead of trimming to avoid framerate issues
182
+ // The static frames are created with proper duration, use offset to start transition at the right time
183
+ const prevBeatDuration = context.studio.beats[beatIndex - 1].duration ?? 0;
184
+ const xfadeOffset = prevBeatDuration - duration;
185
+ // Apply xfade with explicit pixel format
186
+ const xfadeOutputId = `${transitionVideoId}_xfade`;
187
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuv420p[${transitionVideoId}_fmt]`);
188
+ ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuv420p[${nextVideoId}_fmt]`);
189
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}_fmt][${nextVideoId}_fmt]xfade=transition=${transition.type}:duration=${duration}:offset=${xfadeOffset}[${xfadeOutputId}]`);
190
+ // Set PTS for overlay timing
191
+ const xfadeTimedId = `${xfadeOutputId}_t`;
192
+ ffmpegContext.filterComplex.push(`[${xfadeOutputId}]setpts=PTS-STARTPTS+${startAt}/TB[${xfadeTimedId}]`);
193
+ // Overlay the xfade result on the concat video
194
+ ffmpegContext.filterComplex.push(`[${prevVideoId}][${xfadeTimedId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
195
+ }
196
+ else {
197
+ throw new Error(`Unknown transition type: ${transition.type}`);
198
+ }
199
+ return outputVideoId;
200
+ }, captionedVideoId);
201
+ };
202
+ export const getNeedFirstFrame = (context) => {
203
+ return context.studio.script.beats.map((beat, index) => {
204
+ if (index === 0)
205
+ return false; // First beat cannot have transition
206
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
207
+ return (transition?.type.startsWith("slidein_") || transition?.type.startsWith("wipe")) ?? false;
208
+ });
209
+ };
210
+ export const getNeedLastFrame = (context) => {
211
+ return context.studio.script.beats.map((beat, index) => {
212
+ if (index === context.studio.script.beats.length - 1)
213
+ return false; // Last beat doesn't need _last
214
+ const nextTransition = MulmoPresentationStyleMethods.getMovieTransition(context, context.studio.script.beats[index + 1]);
215
+ return nextTransition !== null; // Any transition on next beat requires this beat's last frame
216
+ });
126
217
  };
127
218
  const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
128
219
  if (audioIdsFromMovieBeats.length > 0) {
@@ -135,17 +226,96 @@ const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMov
135
226
  }
136
227
  return artifactAudioId;
137
228
  };
138
- const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
139
- const caption = MulmoStudioContextMethods.getCaption(context);
140
- const start = performance.now();
141
- const ffmpegContext = FfmpegContextInit();
142
- const missingIndex = context.studio.beats.findIndex((studioBeat, index) => {
229
+ export const getExtraPadding = (context, index) => {
230
+ // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
231
+ if (index === 0) {
232
+ return MulmoStudioContextMethods.getIntroPadding(context);
233
+ }
234
+ else if (index === context.studio.beats.length - 1) {
235
+ return context.presentationStyle.audioParams.outroPadding;
236
+ }
237
+ return 0;
238
+ };
239
+ export const getFillOption = (context, beat) => {
240
+ // Get fillOption from merged imageParams (global + beat-specific)
241
+ const globalFillOption = context.presentationStyle.movieParams?.fillOption;
242
+ const beatFillOption = beat.movieParams?.fillOption;
243
+ const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
244
+ return { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
245
+ };
246
+ export const getTransitionVideoId = (transition, videoIdsForBeats, index) => {
247
+ if (transition.type === "fade" || transition.type.startsWith("slideout_")) {
248
+ // Use previous beat's last frame. TODO: support voice-over
249
+ const prevVideoSourceId = videoIdsForBeats[index - 1];
250
+ // Both movie and image beats now have _last
251
+ const frameId = `${prevVideoSourceId}_last`;
252
+ return { videoId: frameId, nextVideoId: undefined, beatIndex: index };
253
+ }
254
+ if (transition.type.startsWith("wipe")) {
255
+ // Wipe needs both previous beat's last frame and this beat's first frame
256
+ const prevVideoSourceId = videoIdsForBeats[index - 1];
257
+ const prevLastFrame = `${prevVideoSourceId}_last`;
258
+ const nextFirstFrame = `${videoIdsForBeats[index]}_first`;
259
+ return { videoId: prevLastFrame, nextVideoId: nextFirstFrame, beatIndex: index };
260
+ }
261
+ // Use this beat's first frame. slidein_ case
262
+ return { videoId: "", nextVideoId: `${videoIdsForBeats[index]}_first`, beatIndex: index };
263
+ };
264
+ export const getConcatVideoFilter = (concatVideoId, videoIdsForBeats) => {
265
+ const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
266
+ const inputs = videoIds.map((id) => `[${id}]`).join("");
267
+ return `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
268
+ };
269
+ export const validateBeatSource = (studioBeat, index) => {
270
+ const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
271
+ assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
272
+ assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
273
+ assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
274
+ return sourceFile;
275
+ };
276
+ export const addSplitAndExtractFrames = (ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo) => {
277
+ const outputs = [`[${videoId}]`];
278
+ if (needFirst)
279
+ outputs.push(`[${videoId}_first_src]`);
280
+ if (needLast)
281
+ outputs.push(`[${videoId}_last_src]`);
282
+ ffmpegContext.filterComplex.push(`[${videoId}]split=${outputs.length}${outputs.join("")}`);
283
+ if (needFirst) {
284
+ // Create static frame using nullsrc as base for proper framerate/timebase
285
+ // Note: setpts must NOT be used here as it loses framerate metadata needed by xfade
286
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_first_null]`);
287
+ ffmpegContext.filterComplex.push(`[${videoId}_first_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_first_frame]`);
288
+ ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=30[${videoId}_first]`);
289
+ }
290
+ if (needLast) {
291
+ if (isMovie) {
292
+ // Movie beats: extract actual last frame
293
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
294
+ ffmpegContext.filterComplex.push(`[${videoId}_last_src]reverse,select='eq(n,0)',reverse,scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
295
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
296
+ }
297
+ else {
298
+ // Image beats: all frames are identical, so just select one
299
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
300
+ ffmpegContext.filterComplex.push(`[${videoId}_last_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
301
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
302
+ }
303
+ }
304
+ };
305
+ const findMissingIndex = (context) => {
306
+ return context.studio.beats.findIndex((studioBeat, index) => {
143
307
  const beat = context.studio.script.beats[index];
144
308
  if (beat.image?.type === "voice_over") {
145
309
  return false; // Voice-over does not have either imageFile or movieFile.
146
310
  }
147
311
  return !studioBeat.imageFile && !studioBeat.movieFile;
148
312
  });
313
+ };
314
+ export const createVideo = async (audioArtifactFilePath, outputVideoPath, context, isTest = false) => {
315
+ const caption = MulmoStudioContextMethods.getCaption(context);
316
+ const start = performance.now();
317
+ const ffmpegContext = FfmpegContextInit();
318
+ const missingIndex = findMissingIndex(context);
149
319
  if (missingIndex !== -1) {
150
320
  GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
151
321
  return false;
@@ -156,6 +326,10 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
156
326
  const audioIdsFromMovieBeats = [];
157
327
  const transitionVideoIds = [];
158
328
  const beatTimestamps = [];
329
+ // Check which beats need _first (for slidein transition on this beat)
330
+ const needsFirstFrame = getNeedFirstFrame(context);
331
+ // Check which beats need _last (for any transition on next beat - they all need previous beat's last frame)
332
+ const needsLastFrame = getNeedLastFrame(context);
159
333
  context.studio.beats.reduce((timestamp, studioBeat, index) => {
160
334
  const beat = context.studio.script.beats[index];
161
335
  if (beat.image?.type === "voice_over") {
@@ -163,47 +337,28 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
163
337
  beatTimestamps.push(timestamp);
164
338
  return timestamp; // Skip voice-over beats.
165
339
  }
166
- const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
167
- assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
168
- assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
169
- assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
170
- const extraPadding = (() => {
171
- // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
172
- if (index === 0) {
173
- return MulmoStudioContextMethods.getIntroPadding(context);
174
- }
175
- else if (index === context.studio.beats.length - 1) {
176
- return context.presentationStyle.audioParams.outroPadding;
177
- }
178
- return 0;
179
- })();
340
+ const sourceFile = isTest ? "/test/dummy.mp4" : validateBeatSource(studioBeat, index);
180
341
  // The movie duration is bigger in case of voice-over.
181
- const duration = Math.max(studioBeat.duration + extraPadding, studioBeat.movieDuration ?? 0);
182
- // Get fillOption from merged imageParams (global + beat-specific)
183
- const globalFillOption = context.presentationStyle.movieParams?.fillOption;
184
- const beatFillOption = beat.movieParams?.fillOption;
185
- const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
186
- const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
342
+ const duration = Math.max(studioBeat.duration + getExtraPadding(context, index), studioBeat.movieDuration ?? 0);
187
343
  const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
188
- const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
344
+ const isMovie = !!(studioBeat.lipSyncFile ||
345
+ studioBeat.movieFile ||
346
+ MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat) === "movie");
189
347
  const speed = beat.movieParams?.speed ?? 1.0;
190
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
348
+ const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed);
191
349
  ffmpegContext.filterComplex.push(videoPart);
192
- if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
193
- // NOTE: We split the video into two parts for transition.
194
- ffmpegContext.filterComplex.push(`[${videoId}]split=2[${videoId}_0][${videoId}_1]`);
195
- videoIdsForBeats.push(`${videoId}_0`);
196
- if (mediaType === "movie") {
197
- // For movie beats, extract the last frame for transition
198
- ffmpegContext.filterComplex.push(`[${videoId}_1]reverse,select='eq(n,0)',reverse,tpad=stop_mode=clone:stop_duration=${duration},fps=30,setpts=PTS-STARTPTS[${videoId}_2]`);
199
- transitionVideoIds.push(`${videoId}_2`);
200
- }
201
- else {
202
- transitionVideoIds.push(`${videoId}_1`);
203
- }
350
+ // for transition
351
+ const needFirst = needsFirstFrame[index]; // This beat has slidein
352
+ const needLast = needsLastFrame[index]; // Next beat has transition
353
+ videoIdsForBeats.push(videoId);
354
+ if (needFirst || needLast) {
355
+ addSplitAndExtractFrames(ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo);
204
356
  }
205
- else {
206
- videoIdsForBeats.push(videoId);
357
+ // Record transition info if this beat has a transition
358
+ const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
359
+ if (transition && index > 0) {
360
+ const transitionVideoId = getTransitionVideoId(transition, videoIdsForBeats, index);
361
+ transitionVideoIds.push(transitionVideoId);
207
362
  }
208
363
  // NOTE: We don't support audio if the speed is not 1.0.
209
364
  const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
@@ -218,23 +373,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
218
373
  }, 0);
219
374
  assert(videoIdsForBeats.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
220
375
  assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
221
- // console.log("*** images", images.audioIds);
222
376
  // Concatenate the trimmed images
223
377
  const concatVideoId = "concat_video";
224
- const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
225
- const inputs = videoIds.map((id) => `[${id}]`).join("");
226
- const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
227
- ffmpegContext.filterComplex.push(filter);
378
+ ffmpegContext.filterComplex.push(getConcatVideoFilter(concatVideoId, videoIdsForBeats));
228
379
  const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
229
- const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
380
+ const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats);
381
+ if (isTest) {
382
+ return ffmpegContext.filterComplex;
383
+ }
230
384
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
231
385
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
232
- const artifactAudioId = `${audioIndex}:a`;
233
- const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, artifactAudioId, audioIdsFromMovieBeats);
234
- // GraphAILogger.debug("filterComplex", ffmpegContext.filterComplex);
386
+ const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
235
387
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
236
- const end = performance.now();
237
- GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
388
+ const endTime = performance.now();
389
+ GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
238
390
  GraphAILogger.info(context.studio.script.title);
239
391
  GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
240
392
  return true;
@@ -3,13 +3,14 @@
3
3
  * (No Node.js built-ins like fs, path, dotenv, etc.)
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
- import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
6
+ import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext, MulmoTransition } from "../types/index.js";
7
7
  export declare const MulmoPresentationStyleMethods: {
8
8
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
9
9
  getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
10
10
  getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
11
11
  getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
12
12
  getSpeaker(context: MulmoStudioContext, beat: MulmoBeat, targetLang: string | undefined): SpeakerData;
13
+ getMovieTransition(context: MulmoStudioContext, beat: MulmoBeat): MulmoTransition | null;
13
14
  getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
14
15
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
15
16
  getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
@@ -20,11 +21,11 @@ export declare const MulmoPresentationStyleMethods: {
20
21
  fillOption?: {
21
22
  style: "aspectFit" | "aspectFill";
22
23
  } | undefined;
23
- speed?: number | undefined;
24
24
  transition?: {
25
- type: "fade" | "slideout_left";
25
+ type: "fade" | "slideout_left" | "slideout_right" | "slideout_up" | "slideout_down" | "slidein_left" | "slidein_right" | "slidein_up" | "slidein_down" | "wipeleft" | "wiperight" | "wipeup" | "wipedown" | "wipetl" | "wipetr" | "wipebl" | "wipebr";
26
26
  duration: number;
27
27
  } | undefined;
28
+ speed?: number | undefined;
28
29
  };
29
30
  keyName: string;
30
31
  };
@@ -5,7 +5,7 @@
5
5
  */
6
6
  import { isNull } from "graphai";
7
7
  import { userAssert } from "../utils/utils.js";
8
- import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
8
+ import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
9
9
  import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../utils/provider2agent.js";
10
10
  const defaultTextSlideStyles = [
11
11
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
@@ -63,6 +63,12 @@ export const MulmoPresentationStyleMethods = {
63
63
  }
64
64
  return speaker;
65
65
  },
66
+ getMovieTransition(context, beat) {
67
+ const transitionData = beat.movieParams?.transition ?? context.presentationStyle.movieParams?.transition;
68
+ if (!transitionData)
69
+ return null;
70
+ return mulmoTransitionSchema.parse(transitionData);
71
+ },
66
72
  /* NOTE: This method is not used.
67
73
  getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
68
74
  const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);