npm - mulmocast - Versions diffs - 2.0.8 → 2.1.0 - Mend

mulmocast 2.0.8 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/lib/actions/movie.d.ts +29 -2
package/lib/actions/movie.js +243 -91
package/lib/methods/mulmo_presentation_style.d.ts +4 -3
package/lib/methods/mulmo_presentation_style.js +7 -1
package/lib/types/schema.d.ts +202 -31
package/lib/types/schema.js +27 -13
package/lib/types/schema_video_filter.d.ts +423 -0
package/lib/types/schema_video_filter.js +253 -0
package/lib/types/type.d.ts +2 -1
package/lib/utils/context.d.ts +11 -3
package/lib/utils/ffmpeg_utils.js +2 -2
package/lib/utils/provider2agent.d.ts +4 -0
package/lib/utils/provider2agent.js +5 -0
package/lib/utils/utils.js +24 -55
package/lib/utils/video_filter.d.ts +7 -0
package/lib/utils/video_filter.js +149 -0
package/package.json +7 -7
package/scripts/test/README.md +48 -48
package/scripts/test/test_transition2.json +460 -0
package/scripts/test/test_transition2.json~ +62 -0
package/scripts/test/test_transition3.json +70 -0
package/scripts/test/test_transition3.json~ +76 -0
package/scripts/test/test_transition_no_audio.json +16 -0
package/scripts/test/test_video_filters.json~ +227 -0
package/scripts/test/test_wipe_simple.json +37 -0
package/scripts/test/test_all_image.json~ +0 -45
package/scripts/test/test_all_movie.json~ +0 -37
package/scripts/test/test_all_tts.json~ +0 -83
package/scripts/test/test_audio_gemini.json~ +0 -67
package/scripts/test/test_genai2.json~ +0 -84
package/scripts/test/test_genai_movie.json~ +0 -22
package/scripts/test/test_kotodama.json~ +0 -0
package/scripts/test/test_lipsync2.json~ +0 -24
package/scripts/test/test_movie2.json~ +0 -40
package/scripts/test/test_play_to_end.json~ +0 -65

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
-export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
+import { MulmoStudioContext, MulmoBeat, MulmoTransition, MulmoCanvasDimension, MulmoFillOption } from "../types/index.js";
+import { FfmpegContext } from "../utils/ffmpeg_utils.js";
+type VideoId = string | undefined;
+export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number) => {
     videoId: string;
     videoPart: string;
 };
@@ -7,5 +9,30 @@ export declare const getAudioPart: (inputIndex: number, duration: number, delay:
     audioId: string;
     audioPart: string;
 };
+export declare const getOutOverlayCoords: (transitionType: string, d: number, t: number) => string;
+export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
+export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
+export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
+export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
+export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
+    style: "aspectFit" | "aspectFill";
+};
+export declare const getTransitionVideoId: (transition: MulmoTransition, videoIdsForBeats: VideoId[], index: number) => {
+    videoId: string;
+    nextVideoId: undefined;
+    beatIndex: number;
+} | {
+    videoId: string;
+    nextVideoId: string;
+    beatIndex: number;
+};
+export declare const getConcatVideoFilter: (concatVideoId: string, videoIdsForBeats: VideoId[]) => string;
+export declare const validateBeatSource: (studioBeat: MulmoStudioContext["studio"]["beats"][number], index: number) => string;
+export declare const addSplitAndExtractFrames: (ffmpegContext: FfmpegContext, videoId: string, duration: number, isMovie: boolean, needFirst: boolean, needLast: boolean, canvasInfo: {
+    width: number;
+    height: number;
+}) => void;
+export declare const createVideo: (audioArtifactFilePath: string, outputVideoPath: string, context: MulmoStudioContext, isTest?: boolean) => Promise<boolean | string[]>;
 export declare const movieFilePath: (context: MulmoStudioContext) => string;
 export declare const movie: (context: MulmoStudioContext) => Promise<boolean>;
+export {};

package/lib/actions/movie.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { GraphAILogger, assert } from "graphai";
-import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
+import { mulmoFillOptionSchema } from "../types/index.js";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage, isFile } from "../utils/file.js";
 import { createVideoFileError, createVideoSourceError } from "../utils/error_cause.js";
@@ -7,27 +7,27 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAud
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
-export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption, speed) => {
+export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed) => {
     const videoId = `v${inputIndex}`;
     const videoFilters = [];
     // Handle different media types
     const originalDuration = duration * speed;
-    if (mediaType === "image") {
-        videoFilters.push("loop=loop=-1:size=1:start=0");
-    }
-    else if (mediaType === "movie") {
+    if (isMovie) {
         // For videos, extend with last frame if shorter than required duration
         // tpad will extend the video by cloning the last frame, then trim will ensure exact duration
         videoFilters.push(`tpad=stop_mode=clone:stop_duration=${originalDuration * 2}`); // Use 2x duration to ensure coverage
     }
+    else {
+        videoFilters.push("loop=loop=-1:size=1:start=0");
+    }
     // Common filters for all media types
     videoFilters.push(`trim=duration=${originalDuration}`, "fps=30");
     // Apply speed if specified
-    if (speed !== 1.0) {
-        videoFilters.push(`setpts=${1 / speed}*PTS`);
+    if (speed === 1.0) {
+        videoFilters.push("setpts=PTS-STARTPTS");
     }
     else {
-        videoFilters.push("setpts=PTS-STARTPTS");
+        videoFilters.push(`setpts=${1 / speed}*PTS`);
     }
     // Apply scaling based on fill option
     if (fillOption.style === "aspectFill") {
@@ -43,7 +43,7 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOp
     videoFilters.push("setsar=1", "format=yuv420p");
     return {
         videoId,
-        videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
+        videoPart: `[${inputIndex}:v]` + videoFilters.join(",") + `[${videoId}]`,
     };
 };
 export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
@@ -82,47 +82,138 @@ const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
     const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
     if (caption && beatsWithCaptions.length > 0) {
         const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
-        return beatsWithCaptions.reduce((acc, beat, index) => {
+        return beatsWithCaptions.reduce((prevVideoId, beat, index) => {
             const { startAt, duration, captionFile } = beat;
             if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
                 const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
                 const compositeVideoId = `oc${index}`;
-                ffmpegContext.filterComplex.push(`[${acc}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
+                ffmpegContext.filterComplex.push(`[${prevVideoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
                 return compositeVideoId;
             }
-            return acc;
+            return prevVideoId;
         }, concatVideoId);
     }
     return concatVideoId;
 };
-const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps) => {
-    if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
-        const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
-        return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
-            const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
-            const processedVideoId = `${transitionVideoId}_f`;
-            let transitionFilter;
-            if (transition.type === "fade") {
-                transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
-            }
-            else if (transition.type === "slideout_left") {
-                transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
-            }
-            else {
-                throw new Error(`Unknown transition type: ${transition.type}`);
-            }
-            ffmpegContext.filterComplex.push(transitionFilter);
-            const outputId = `${transitionVideoId}_o`;
-            if (transition.type === "fade") {
-                ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+export const getOutOverlayCoords = (transitionType, d, t) => {
+    if (transitionType === "slideout_left") {
+        return `x='-(t-${t})*W/${d}':y=0`;
+    }
+    else if (transitionType === "slideout_right") {
+        return `x='(t-${t})*W/${d}':y=0`;
+    }
+    else if (transitionType === "slideout_up") {
+        return `x=0:y='-(t-${t})*H/${d}'`;
+    }
+    else if (transitionType === "slideout_down") {
+        return `x=0:y='(t-${t})*H/${d}'`;
+    }
+    throw new Error(`Unknown transition type: ${transitionType}`);
+};
+export const getInOverlayCoords = (transitionType, d, t) => {
+    if (transitionType === "slidein_left") {
+        return `x='-W+(t-${t})*W/${d}':y=0`;
+    }
+    else if (transitionType === "slidein_right") {
+        return `x='W-(t-${t})*W/${d}':y=0`;
+    }
+    else if (transitionType === "slidein_up") {
+        return `x=0:y='H-(t-${t})*H/${d}'`;
+    }
+    else if (transitionType === "slidein_down") {
+        return `x=0:y='-H+(t-${t})*H/${d}'`;
+    }
+    throw new Error(`Unknown transition type: ${transitionType}`);
+};
+const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats) => {
+    if (transitionVideoIds.length === 0) {
+        return captionedVideoId;
+    }
+    return transitionVideoIds.reduce((prevVideoId, { videoId: transitionVideoId, nextVideoId, beatIndex }) => {
+        const beat = context.studio.script.beats[beatIndex];
+        const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
+        if (!transition) {
+            return prevVideoId; // Skip if no transition is defined
+        }
+        // Transition happens at the start of this beat
+        const startAt = beatTimestamps[beatIndex] - 0.05; // 0.05 is to avoid flickering
+        const duration = transition.duration;
+        const outputVideoId = `trans_${beatIndex}_o`;
+        const processedVideoId = `${transitionVideoId}_f`;
+        if (transition.type === "fade") {
+            // Fade out the previous beat's last frame
+            ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${duration}:alpha=1,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
+            ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
+        }
+        else if (transition.type.startsWith("slideout_")) {
+            // Slideout: previous beat's last frame slides out
+            ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${processedVideoId}]`);
+            ffmpegContext.filterComplex.push(`[${prevVideoId}][${processedVideoId}]overlay=${getOutOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
+        }
+        else if (transition.type.startsWith("slidein_")) {
+            // Slidein: this beat's first frame slides in over the previous beat's last frame
+            if (!nextVideoId) {
+                // Cannot apply slidein without first frame
+                return prevVideoId;
             }
-            else if (transition.type === "slideout_left") {
-                ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+            // Get previous beat's last frame for background
+            const prevVideoSourceId = videoIdsForBeats[beatIndex - 1];
+            // Both movie and image beats now have _last
+            const prevLastFrame = `${prevVideoSourceId}_last`;
+            // Prepare background (last frame of previous beat)
+            const backgroundVideoId = `${prevLastFrame}_bg`;
+            ffmpegContext.filterComplex.push(`[${prevLastFrame}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${backgroundVideoId}]`);
+            // Prepare sliding frame (first frame of this beat)
+            const slideinFrameId = `${nextVideoId}_f`;
+            ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${startAt}/TB[${slideinFrameId}]`);
+            // First overlay: put background on top of concat video
+            const bgOutputId = `${prevLastFrame}_bg_o`;
+            ffmpegContext.filterComplex.push(`[${prevVideoId}][${backgroundVideoId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${bgOutputId}]`);
+            // Second overlay: slide in the new frame on top of background
+            ffmpegContext.filterComplex.push(`[${bgOutputId}][${slideinFrameId}]overlay=${getInOverlayCoords(transition.type, duration, startAt)}:enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
+        }
+        else if (transition.type.startsWith("wipe")) {
+            // Wipe transition: use xfade filter between previous beat's last frame and this beat's first frame
+            if (!nextVideoId) {
+                // Cannot apply wipe without first frame
+                return prevVideoId;
             }
-            return outputId;
-        }, captionedVideoId);
-    }
-    return captionedVideoId;
+            // Use xfade offset instead of trimming to avoid framerate issues
+            // The static frames are created with proper duration, use offset to start transition at the right time
+            const prevBeatDuration = context.studio.beats[beatIndex - 1].duration ?? 0;
+            const xfadeOffset = prevBeatDuration - duration;
+            // Apply xfade with explicit pixel format
+            const xfadeOutputId = `${transitionVideoId}_xfade`;
+            ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuv420p[${transitionVideoId}_fmt]`);
+            ffmpegContext.filterComplex.push(`[${nextVideoId}]format=yuv420p[${nextVideoId}_fmt]`);
+            ffmpegContext.filterComplex.push(`[${transitionVideoId}_fmt][${nextVideoId}_fmt]xfade=transition=${transition.type}:duration=${duration}:offset=${xfadeOffset}[${xfadeOutputId}]`);
+            // Set PTS for overlay timing
+            const xfadeTimedId = `${xfadeOutputId}_t`;
+            ffmpegContext.filterComplex.push(`[${xfadeOutputId}]setpts=PTS-STARTPTS+${startAt}/TB[${xfadeTimedId}]`);
+            // Overlay the xfade result on the concat video
+            ffmpegContext.filterComplex.push(`[${prevVideoId}][${xfadeTimedId}]overlay=enable='between(t,${startAt},${startAt + duration})'[${outputVideoId}]`);
+        }
+        else {
+            throw new Error(`Unknown transition type: ${transition.type}`);
+        }
+        return outputVideoId;
+    }, captionedVideoId);
+};
+export const getNeedFirstFrame = (context) => {
+    return context.studio.script.beats.map((beat, index) => {
+        if (index === 0)
+            return false; // First beat cannot have transition
+        const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
+        return (transition?.type.startsWith("slidein_") || transition?.type.startsWith("wipe")) ?? false;
+    });
+};
+export const getNeedLastFrame = (context) => {
+    return context.studio.script.beats.map((beat, index) => {
+        if (index === context.studio.script.beats.length - 1)
+            return false; // Last beat doesn't need _last
+        const nextTransition = MulmoPresentationStyleMethods.getMovieTransition(context, context.studio.script.beats[index + 1]);
+        return nextTransition !== null; // Any transition on next beat requires this beat's last frame
+    });
 };
 const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
     if (audioIdsFromMovieBeats.length > 0) {
@@ -135,17 +226,96 @@ const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMov
     }
     return artifactAudioId;
 };
-const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
-    const caption = MulmoStudioContextMethods.getCaption(context);
-    const start = performance.now();
-    const ffmpegContext = FfmpegContextInit();
-    const missingIndex = context.studio.beats.findIndex((studioBeat, index) => {
+export const getExtraPadding = (context, index) => {
+    // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
+    if (index === 0) {
+        return MulmoStudioContextMethods.getIntroPadding(context);
+    }
+    else if (index === context.studio.beats.length - 1) {
+        return context.presentationStyle.audioParams.outroPadding;
+    }
+    return 0;
+};
+export const getFillOption = (context, beat) => {
+    // Get fillOption from merged imageParams (global + beat-specific)
+    const globalFillOption = context.presentationStyle.movieParams?.fillOption;
+    const beatFillOption = beat.movieParams?.fillOption;
+    const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
+    return { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
+};
+export const getTransitionVideoId = (transition, videoIdsForBeats, index) => {
+    if (transition.type === "fade" || transition.type.startsWith("slideout_")) {
+        // Use previous beat's last frame. TODO: support voice-over
+        const prevVideoSourceId = videoIdsForBeats[index - 1];
+        // Both movie and image beats now have _last
+        const frameId = `${prevVideoSourceId}_last`;
+        return { videoId: frameId, nextVideoId: undefined, beatIndex: index };
+    }
+    if (transition.type.startsWith("wipe")) {
+        // Wipe needs both previous beat's last frame and this beat's first frame
+        const prevVideoSourceId = videoIdsForBeats[index - 1];
+        const prevLastFrame = `${prevVideoSourceId}_last`;
+        const nextFirstFrame = `${videoIdsForBeats[index]}_first`;
+        return { videoId: prevLastFrame, nextVideoId: nextFirstFrame, beatIndex: index };
+    }
+    // Use this beat's first frame. slidein_ case
+    return { videoId: "", nextVideoId: `${videoIdsForBeats[index]}_first`, beatIndex: index };
+};
+export const getConcatVideoFilter = (concatVideoId, videoIdsForBeats) => {
+    const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
+    const inputs = videoIds.map((id) => `[${id}]`).join("");
+    return `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
+};
+export const validateBeatSource = (studioBeat, index) => {
+    const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
+    assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
+    assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
+    assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
+    return sourceFile;
+};
+export const addSplitAndExtractFrames = (ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo) => {
+    const outputs = [`[${videoId}]`];
+    if (needFirst)
+        outputs.push(`[${videoId}_first_src]`);
+    if (needLast)
+        outputs.push(`[${videoId}_last_src]`);
+    ffmpegContext.filterComplex.push(`[${videoId}]split=${outputs.length}${outputs.join("")}`);
+    if (needFirst) {
+        // Create static frame using nullsrc as base for proper framerate/timebase
+        // Note: setpts must NOT be used here as it loses framerate metadata needed by xfade
+        ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_first_null]`);
+        ffmpegContext.filterComplex.push(`[${videoId}_first_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_first_frame]`);
+        ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=30[${videoId}_first]`);
+    }
+    if (needLast) {
+        if (isMovie) {
+            // Movie beats: extract actual last frame
+            ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
+            ffmpegContext.filterComplex.push(`[${videoId}_last_src]reverse,select='eq(n,0)',reverse,scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
+            ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
+        }
+        else {
+            // Image beats: all frames are identical, so just select one
+            ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${duration}:rate=30[${videoId}_last_null]`);
+            ffmpegContext.filterComplex.push(`[${videoId}_last_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
+            ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
+        }
+    }
+};
+const findMissingIndex = (context) => {
+    return context.studio.beats.findIndex((studioBeat, index) => {
         const beat = context.studio.script.beats[index];
         if (beat.image?.type === "voice_over") {
             return false; // Voice-over does not have either imageFile or movieFile.
         }
         return !studioBeat.imageFile && !studioBeat.movieFile;
     });
+};
+export const createVideo = async (audioArtifactFilePath, outputVideoPath, context, isTest = false) => {
+    const caption = MulmoStudioContextMethods.getCaption(context);
+    const start = performance.now();
+    const ffmpegContext = FfmpegContextInit();
+    const missingIndex = findMissingIndex(context);
     if (missingIndex !== -1) {
         GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
         return false;
@@ -156,6 +326,10 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
     const audioIdsFromMovieBeats = [];
     const transitionVideoIds = [];
     const beatTimestamps = [];
+    // Check which beats need _first (for slidein transition on this beat)
+    const needsFirstFrame = getNeedFirstFrame(context);
+    // Check which beats need _last (for any transition on next beat - they all need previous beat's last frame)
+    const needsLastFrame = getNeedLastFrame(context);
     context.studio.beats.reduce((timestamp, studioBeat, index) => {
         const beat = context.studio.script.beats[index];
         if (beat.image?.type === "voice_over") {
@@ -163,47 +337,28 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
             beatTimestamps.push(timestamp);
             return timestamp; // Skip voice-over beats.
         }
-        const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.htmlImageFile ?? studioBeat.imageFile;
-        assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`, false, createVideoSourceError(index));
-        assert(isFile(sourceFile), `studioBeat.imageFile or studioBeat.movieFile is not exist or not file: index=${index} file=${sourceFile}`, false, createVideoFileError(index, sourceFile));
-        assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
-        const extraPadding = (() => {
-            // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
-            if (index === 0) {
-                return MulmoStudioContextMethods.getIntroPadding(context);
-            }
-            else if (index === context.studio.beats.length - 1) {
-                return context.presentationStyle.audioParams.outroPadding;
-            }
-            return 0;
-        })();
+        const sourceFile = isTest ? "/test/dummy.mp4" : validateBeatSource(studioBeat, index);
         // The movie duration is bigger in case of voice-over.
-        const duration = Math.max(studioBeat.duration + extraPadding, studioBeat.movieDuration ?? 0);
-        // Get fillOption from merged imageParams (global + beat-specific)
-        const globalFillOption = context.presentationStyle.movieParams?.fillOption;
-        const beatFillOption = beat.movieParams?.fillOption;
-        const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
-        const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
+        const duration = Math.max(studioBeat.duration + getExtraPadding(context, index), studioBeat.movieDuration ?? 0);
         const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
-        const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
+        const isMovie = !!(studioBeat.lipSyncFile ||
+            studioBeat.movieFile ||
+            MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat) === "movie");
         const speed = beat.movieParams?.speed ?? 1.0;
-        const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
+        const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed);
         ffmpegContext.filterComplex.push(videoPart);
-        if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
-            // NOTE: We split the video into two parts for transition.
-            ffmpegContext.filterComplex.push(`[${videoId}]split=2[${videoId}_0][${videoId}_1]`);
-            videoIdsForBeats.push(`${videoId}_0`);
-            if (mediaType === "movie") {
-                // For movie beats, extract the last frame for transition
-                ffmpegContext.filterComplex.push(`[${videoId}_1]reverse,select='eq(n,0)',reverse,tpad=stop_mode=clone:stop_duration=${duration},fps=30,setpts=PTS-STARTPTS[${videoId}_2]`);
-                transitionVideoIds.push(`${videoId}_2`);
-            }
-            else {
-                transitionVideoIds.push(`${videoId}_1`);
-            }
+        // for transition
+        const needFirst = needsFirstFrame[index]; // This beat has slidein
+        const needLast = needsLastFrame[index]; // Next beat has transition
+        videoIdsForBeats.push(videoId);
+        if (needFirst || needLast) {
+            addSplitAndExtractFrames(ffmpegContext, videoId, duration, isMovie, needFirst, needLast, canvasInfo);
         }
-        else {
-            videoIdsForBeats.push(videoId);
+        // Record transition info if this beat has a transition
+        const transition = MulmoPresentationStyleMethods.getMovieTransition(context, beat);
+        if (transition && index > 0) {
+            const transitionVideoId = getTransitionVideoId(transition, videoIdsForBeats, index);
+            transitionVideoIds.push(transitionVideoId);
         }
         // NOTE: We don't support audio if the speed is not 1.0.
         const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
@@ -218,23 +373,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
     }, 0);
     assert(videoIdsForBeats.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
     assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
-    // console.log("*** images", images.audioIds);
     // Concatenate the trimmed images
     const concatVideoId = "concat_video";
-    const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
-    const inputs = videoIds.map((id) => `[${id}]`).join("");
-    const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
-    ffmpegContext.filterComplex.push(filter);
+    ffmpegContext.filterComplex.push(getConcatVideoFilter(concatVideoId, videoIdsForBeats));
     const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
-    const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
+    const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps, videoIdsForBeats);
+    if (isTest) {
+        return ffmpegContext.filterComplex;
+    }
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
-    const artifactAudioId = `${audioIndex}:a`;
-    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, artifactAudioId, audioIdsFromMovieBeats);
-    // GraphAILogger.debug("filterComplex", ffmpegContext.filterComplex);
+    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
-    const end = performance.now();
-    GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
+    const endTime = performance.now();
+    GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
     GraphAILogger.info(context.studio.script.title);
     GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
     return true;

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -3,13 +3,14 @@
  * (No Node.js built-ins like fs, path, dotenv, etc.)
  * Works in both Node.js and modern browsers.
  */
-import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
+import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext, MulmoTransition } from "../types/index.js";
 export declare const MulmoPresentationStyleMethods: {
     getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
     getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
     getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
     getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
     getSpeaker(context: MulmoStudioContext, beat: MulmoBeat, targetLang: string | undefined): SpeakerData;
+    getMovieTransition(context: MulmoStudioContext, beat: MulmoBeat): MulmoTransition | null;
     getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
     getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
     getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
@@ -20,11 +21,11 @@ export declare const MulmoPresentationStyleMethods: {
             fillOption?: {
                 style: "aspectFit" | "aspectFill";
             } | undefined;
-            speed?: number | undefined;
             transition?: {
-                type: "fade" | "slideout_left";
+                type: "fade" | "slideout_left" | "slideout_right" | "slideout_up" | "slideout_down" | "slidein_left" | "slidein_right" | "slidein_up" | "slidein_down" | "wipeleft" | "wiperight" | "wipeup" | "wipedown" | "wipetl" | "wipetr" | "wipebl" | "wipebr";
                 duration: number;
             } | undefined;
+            speed?: number | undefined;
         };
         keyName: string;
     };

package/lib/methods/mulmo_presentation_style.js CHANGED Viewed

@@ -5,7 +5,7 @@
  */
 import { isNull } from "graphai";
 import { userAssert } from "../utils/utils.js";
-import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
+import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
 import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../utils/provider2agent.js";
 const defaultTextSlideStyles = [
     '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
@@ -63,6 +63,12 @@ export const MulmoPresentationStyleMethods = {
         }
         return speaker;
     },
+    getMovieTransition(context, beat) {
+        const transitionData = beat.movieParams?.transition ?? context.presentationStyle.movieParams?.transition;
+        if (!transitionData)
+            return null;
+        return mulmoTransitionSchema.parse(transitionData);
+    },
     /* NOTE: This method is not used.
     getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
       const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);