npm - mulmocast - Versions diffs - 2.6.5 → 2.6.6 - Mend

mulmocast 2.6.5 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/lib/actions/graph_option.d.ts +3 -0
package/lib/actions/graph_option.js +18 -0
package/lib/actions/image_references.js +1 -1
package/lib/actions/images.d.ts +2 -3
package/lib/actions/images.js +4 -17
package/lib/actions/movie.d.ts +3 -0
package/lib/actions/movie.js +38 -5
package/lib/agents/add_bgm_agent.d.ts +10 -0
package/lib/agents/add_bgm_agent.js +26 -4
package/lib/agents/movie_genai_agent.js +1 -1
package/lib/agents/movie_replicate_agent.js +29 -5
package/lib/types/provider2agent.d.ts +2 -0
package/lib/types/provider2agent.js +78 -5
package/lib/types/schema.d.ts +34 -4
package/lib/types/schema.js +9 -1
package/lib/utils/context.d.ts +17 -2
package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
package/lib/utils/image_plugins/html_tailwind.js +56 -5
package/package.json +8 -7
package/scripts/test/fixtures/movie_tone_high.mov +0 -0
package/scripts/test/fixtures/movie_tone_low.mov +0 -0
package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
package/scripts/test/test_audio_mix.json +91 -0
package/scripts/test/test_audio_mix_beat_vol.json +100 -0
package/scripts/test/test_audio_mix_ducking.json +91 -0
package/scripts/test/test_audio_mix_legacy.json +90 -0
package/scripts/test/test_grok.json +57 -0
package/scripts/test/test_image_references.json +74 -0
package/scripts/test/test_kling_v3.json +54 -0
package/scripts/test/test_kling_v3_omni.json +54 -0
package/scripts/test/test_lipsync2.json +48 -52
package/scripts/test/test_lipsync5.json +66 -0
package/scripts/test/test_runway.json +54 -0
package/scripts/test/test_threejs.json +241 -0
package/scripts/test/test_threejs_glb.json +154 -0
package/scripts/test/test_veo31_lite.json +39 -0

package/lib/actions/graph_option.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { GraphOptions } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;

package/lib/actions/graph_option.js ADDED Viewed

@@ -0,0 +1,18 @@
+import { TaskManager } from "graphai";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
+import { fileCacheAgentFilter } from "../utils/filters.js";
+import { settings2GraphAIConfig } from "../utils/utils.js";
+export const graphOption = async (context, settings) => {
+    const options = {
+        agentFilters: [
+            {
+                name: "fileCacheAgentFilter",
+                agent: fileCacheAgentFilter,
+                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
+            },
+        ],
+        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
+        config: settings2GraphAIConfig(settings, process.env),
+    };
+    return options;
+};

package/lib/actions/image_references.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GraphAI, GraphAILogger } from "graphai";
 import { getReferenceImagePath } from "../utils/file.js";
-import { graphOption } from "./images.js";
+import { graphOption } from "./graph_option.js";
 import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
 import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
 import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { GraphOptions, GraphData } from "graphai";
+import type { GraphData } from "graphai";
 import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
 export declare const beat_graph_data: {
     version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
     };
 };
 export declare const images_graph_data: GraphData;
-export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
+export { graphOption } from "./graph_option.js";
 type ImageOptions = {
     imageAgents: Record<string, unknown>;
 };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
         withBackup?: boolean;
     };
 }) => Promise<void>;
-export {};

package/lib/actions/images.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import dotenv from "dotenv";
 import fs from "fs";
-import { GraphAI, GraphAILogger, TaskManager } from "graphai";
+import { GraphAI, GraphAILogger } from "graphai";
 import { AuthenticationError, RateLimitError } from "openai/index.js";
 import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
-import { settings2GraphAIConfig } from "../utils/utils.js";
 import { audioCheckerError } from "../utils/error_cause.js";
 import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
 import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
 import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
+import { graphOption } from "./graph_option.js";
 const vanillaAgents = vanilla.default ?? vanilla;
 const imageAgents = {
     imageGenAIAgent,
@@ -432,20 +431,8 @@ export const images_graph_data = {
         },
     },
 };
-export const graphOption = async (context, settings) => {
-    const options = {
-        agentFilters: [
-            {
-                name: "fileCacheAgentFilter",
-                agent: fileCacheAgentFilter,
-                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
-            },
-        ],
-        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
-        config: settings2GraphAIConfig(settings, process.env),
-    };
-    return options;
-};
+// graphOption moved to graph_option.ts to break circular dependency with image_references.ts
+export { graphOption } from "./graph_option.js";
 const prepareGenerateImages = async (context) => {
     const fileName = MulmoStudioContextMethods.getFileName(context);
     const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
 export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
 export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
 export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
+export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
+export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
+export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
 export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
 export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
     style: "aspectFit" | "aspectFill";

package/lib/actions/movie.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 const VIDEO_FPS = 30;
+const DEFAULT_DUCKING_RATIO = 0.3;
 export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
     const videoId = `v${inputIndex}`;
     const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
         return nextTransition !== null; // Any transition on next beat requires this beat's last frame
     });
 };
-const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
+export const resolveMovieVolume = (beat, context) => {
+    const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
+    const ducking = context.presentationStyle.audioParams.ducking;
+    const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
+    if (ducking && hasSpeech) {
+        const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
+        return baseMovieVolume * ratio;
+    }
+    return baseMovieVolume;
+};
+export const isExplicitMixMode = (context) => {
+    const audioParams = context.presentationStyle.audioParams;
+    const duckingRequested = audioParams.ducking !== undefined;
+    const speechSuppressed = audioParams.suppressSpeech === true;
+    const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
+    const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
+    return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
+};
+export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
     if (audioIdsFromMovieBeats.length > 0) {
         const mainAudioId = "mainaudio";
         const compositeAudioId = "composite";
         const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
-        FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
-        ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        const useExplicitMix = isExplicitMixMode(context);
+        if (useExplicitMix) {
+            // Explicit mode: normalize=0 + limiter.
+            // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
+            // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
+            const mixedId = "mixed";
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
+            // Limiter as failsafe
+            ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
+        }
+        else {
+            // Legacy mode: normalize=1 (current behavior, fully backward compatible)
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        }
         return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
     }
     return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
             transitionVideoIds.push(transitionVideoId);
         }
         // NOTE: We don't support audio if the speed is not 1.0.
-        const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
+        const movieVolume = resolveMovieVolume(beat, context);
         if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
             // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
     }
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
-    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
+    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const endTime = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);

package/lib/agents/add_bgm_agent.d.ts CHANGED Viewed

@@ -1,3 +1,13 @@
 import type { AgentFunctionInfo } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
+    useExplicitMix: boolean;
+    voiceVolume: number;
+};
+export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
+    amixNormalize: string;
+    mixedOutputId: string;
+    limiterFilter: string | undefined;
+};
 declare const addBGMAgentInfo: AgentFunctionInfo;
 export default addBGMAgentInfo;

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { isFile } from "../utils/file.js";
 import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
+export const resolveAddBgmMixParams = (audioParams) => {
+    const useExplicitMix = audioParams.ttsVolume !== undefined;
+    const ttsVolume = audioParams.ttsVolume ?? 1.0;
+    return {
+        useExplicitMix,
+        voiceVolume: audioParams.audioVolume * ttsVolume,
+    };
+};
+export const resolveAddBgmFilterConfig = (useExplicitMix) => {
+    const amixNormalize = useExplicitMix ? ":normalize=0" : "";
+    return {
+        amixNormalize,
+        mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
+        limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
+    };
+};
 const addBGMAgent = async ({ namedInputs, params, }) => {
     const { voiceFile, outputFile, context } = namedInputs;
     const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
     const ffmpegContext = FfmpegContextInit();
     const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
     const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
-    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
-    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
-    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
-    ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
+    const audioParams = context.presentationStyle.audioParams;
+    const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
+    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
+    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
+    const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
+    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
+    if (limiterFilter) {
+        ffmpegContext.filterComplex.push(limiterFilter);
+    }
+    ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
     ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
     try {
         await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);

package/lib/agents/movie_genai_agent.js CHANGED Viewed

@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
         model,
         prompt,
         config: {
-            durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
+            durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
             aspectRatio,
             personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
         },

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
 import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
 import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
-async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
+function replicate_get_videoUrl(output) {
+    if (typeof output === "string")
+        return output;
+    if (output && typeof output === "object" && "url" in output)
+        return output.url();
+    return undefined;
+}
+async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
     const replicate = new Replicate({
         auth: apiKey,
     });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
             input.image = base64Image;
         }
     }
+    // Add reference images if provided and model supports it
+    const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
+    if (referenceImages && referenceImages.length > 0) {
+        if (!referenceImagesParam) {
+            GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
+        }
+        else if (imagePath) {
+            GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
+        }
+        else {
+            input[referenceImagesParam] = referenceImages.map((ref) => {
+                const buffer = readFileSync(ref.imagePath);
+                return `data:image/png;base64,${buffer.toString("base64")}`;
+            });
+        }
+    }
     // Add last frame image if provided and model supports it
     if (lastFrameImagePath) {
         const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
     try {
         const output = await replicate.run(model, { input });
         // Download the generated video
-        if (output && typeof output === "object" && "url" in output) {
-            const videoUrl = output.url();
+        // Some models return a FileOutput object with a url() method; others return a plain string URL.
+        const videoUrl = replicate_get_videoUrl(output);
+        if (videoUrl) {
             const videoResponse = await fetch(videoUrl);
             if (!videoResponse.ok) {
                 throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
     return "9:16";
 };
 export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
-    const { prompt, imagePath, lastFrameImagePath } = namedInputs;
+    const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
     if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
         });
     }
     try {
-        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
+        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
         if (buffer) {
             return { buffer };
         }

package/lib/types/provider2agent.d.ts CHANGED Viewed

@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
             durations: number[];
             start_image: string | undefined;
             last_image?: string;
+            reference_images_param?: string;
             price_per_sec: number;
         }>;
     };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
         keyName: string;
         modelParams: Record<string, {
             durations: number[];
+            supportsDuration: boolean;
             supportsLastFrame: boolean;
             supportsReferenceImages: boolean;
             supportsPersonGeneration: boolean;

package/lib/types/provider2agent.js CHANGED Viewed

@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
             "kwaivgi/kling-v2.1-master",
             "google/veo-2",
             "google/veo-3",
+            "google/veo-3.1",
+            "google/veo-3.1-fast",
+            "google/veo-3.1-lite",
             "google/veo-3-fast",
             "minimax/video-01",
             "minimax/hailuo-02",
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
             "pixverse/pixverse-v4.5",
             "wan-video/wan-2.2-i2v-fast",
             "wan-video/wan-2.2-t2v-fast",
+            "xai/grok-imagine-video",
+            "xai/grok-imagine-r2v",
+            "runwayml/gen-4.5",
+            "kwaivgi/kling-v3-omni-video",
+            "kwaivgi/kling-v3-video",
         ],
         modelParams: {
             "bytedance/seedance-1-lite": {
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
                 start_image: "image",
                 price_per_sec: 0.75,
             },
+            "google/veo-3.1": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.75,
+            },
+            "google/veo-3.1-fast": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame_image",
+                price_per_sec: 0.4,
+            },
+            "google/veo-3.1-lite": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame",
+                price_per_sec: 0.05,
+            },
             "google/veo-3-fast": {
                 durations: [8],
                 start_image: "image",
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
                 start_image: undefined,
                 price_per_sec: 0.012,
             },
+            "xai/grok-imagine-video": {
+                durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "image",
+                price_per_sec: 0.08,
+            },
+            "xai/grok-imagine-r2v": {
+                durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                start_image: undefined,
+                reference_images_param: "reference_images",
+                price_per_sec: 0.08,
+            },
+            "runwayml/gen-4.5": {
+                durations: [5, 10],
+                start_image: "image",
+                price_per_sec: 0.25,
+            },
+            "kwaivgi/kling-v3-omni-video": {
+                durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "start_image",
+                last_image: "end_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.3,
+            },
+            "kwaivgi/kling-v3-video": {
+                durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "start_image",
+                last_image: "end_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.3,
+            },
         },
     },
     google: {
         agentName: "movieGenAIAgent",
         defaultModel: "veo-2.0-generate-001",
-        models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
+        models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
         keyName: "GEMINI_API_KEY",
         modelParams: {
+            "veo-3.1-lite-generate-preview": {
+                durations: [4, 6, 8],
+                supportsDuration: true,
+                supportsLastFrame: true,
+                supportsReferenceImages: false,
+                supportsPersonGeneration: false,
+            },
             "veo-3.1-generate-preview": {
                 durations: [4, 6, 8],
+                supportsDuration: true,
                 supportsLastFrame: true,
                 supportsReferenceImages: true,
                 supportsPersonGeneration: false,
             },
             "veo-3.0-generate-001": {
-                durations: [4, 6, 8],
+                durations: [8],
+                supportsDuration: false, // Veo 3.0 always generates 8s
                 supportsLastFrame: false,
                 supportsReferenceImages: false,
                 supportsPersonGeneration: false,
             },
             "veo-2.0-generate-001": {
-                durations: [5, 6, 7, 8],
+                durations: [5, 6, 8],
+                supportsDuration: true,
                 supportsLastFrame: false, // Vertex AI only
                 supportsReferenceImages: false,
                 supportsPersonGeneration: true,
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
         agentName: "lipSyncReplicateAgent",
         defaultModel: "bytedance/omni-human",
         keyName: "REPLICATE_API_TOKEN",
-        models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
+        models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
         modelParams: {
             "bytedance/latentsync": {
                 identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
                 audio: "audio",
                 price_per_sec: 0.14,
             },
+            "pixverse/lipsync": {
+                identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
+                video: "video",
+                audio: "audio",
+            },
             /* NOTE: This model does not work with large base64 urls.
             "sync/lipsync-2": {
               video: "video",
               audio: "audio",
             },
             */
-            /* NOTE: This model does not work well for some unknown reason.
+            /* NOTE: This model does not work with base64 data URIs (error 1201).
             "kwaivgi/kling-lip-sync": {
+              identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
               video: "video_url",
               audio: "audio_file",
             },

package/lib/types/schema.d.ts CHANGED Viewed

@@ -3370,7 +3370,7 @@ export declare const mulmoSlideParamsSchema: z.ZodObject<{
 }, z.core.$strict>;
 export declare const beatAudioParamsSchema: z.ZodObject<{
     padding: z.ZodOptional<z.ZodNumber>;
-    movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
+    movieVolume: z.ZodOptional<z.ZodNumber>;
 }, z.core.$strict>;
 export declare const mulmoHtmlImageParamsSchema: z.ZodObject<{
     model: z.ZodOptional<z.ZodString>;
@@ -3393,6 +3393,11 @@ export declare const audioParamsSchema: z.ZodObject<{
     bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
     audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
     suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+    movieVolume: z.ZodOptional<z.ZodNumber>;
+    ttsVolume: z.ZodOptional<z.ZodNumber>;
+    ducking: z.ZodOptional<z.ZodObject<{
+        ratio: z.ZodOptional<z.ZodNumber>;
+    }, z.core.$strip>>;
 }, z.core.$strict>;
 export declare const htmlPromptParamsSchema: z.ZodObject<{
     systemPrompt: z.ZodDefault<z.ZodOptional<z.ZodString>>;
@@ -6336,7 +6341,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
     }, z.core.$strict>>;
     audioParams: z.ZodOptional<z.ZodObject<{
         padding: z.ZodOptional<z.ZodNumber>;
-        movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
+        movieVolume: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strict>>;
     movieParams: z.ZodOptional<z.ZodObject<{
         provider: z.ZodOptional<z.ZodEnum<{
@@ -7181,6 +7186,11 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
         bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
         audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
         suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+        movieVolume: z.ZodOptional<z.ZodNumber>;
+        ttsVolume: z.ZodOptional<z.ZodNumber>;
+        ducking: z.ZodOptional<z.ZodObject<{
+            ratio: z.ZodOptional<z.ZodNumber>;
+        }, z.core.$strip>>;
     }, z.core.$strict>>;
 }, z.core.$strip>;
 export declare const mulmoReferenceSchema: z.ZodObject<{
@@ -7677,6 +7687,11 @@ export declare const mulmoScriptSchema: z.ZodObject<{
         bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
         audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
         suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+        movieVolume: z.ZodOptional<z.ZodNumber>;
+        ttsVolume: z.ZodOptional<z.ZodNumber>;
+        ducking: z.ZodOptional<z.ZodObject<{
+            ratio: z.ZodOptional<z.ZodNumber>;
+        }, z.core.$strip>>;
     }, z.core.$strict>>;
     title: z.ZodOptional<z.ZodString>;
     description: z.ZodOptional<z.ZodString>;
@@ -10345,7 +10360,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
         }, z.core.$strict>>;
         audioParams: z.ZodOptional<z.ZodObject<{
             padding: z.ZodOptional<z.ZodNumber>;
-            movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
+            movieVolume: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strict>>;
         movieParams: z.ZodOptional<z.ZodObject<{
             provider: z.ZodOptional<z.ZodEnum<{
@@ -11265,6 +11280,11 @@ export declare const mulmoStudioSchema: z.ZodObject<{
             bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+            movieVolume: z.ZodOptional<z.ZodNumber>;
+            ttsVolume: z.ZodOptional<z.ZodNumber>;
+            ducking: z.ZodOptional<z.ZodObject<{
+                ratio: z.ZodOptional<z.ZodNumber>;
+            }, z.core.$strip>>;
         }, z.core.$strict>>;
         title: z.ZodOptional<z.ZodString>;
         description: z.ZodOptional<z.ZodString>;
@@ -13933,7 +13953,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
             }, z.core.$strict>>;
             audioParams: z.ZodOptional<z.ZodObject<{
                 padding: z.ZodOptional<z.ZodNumber>;
-                movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
+                movieVolume: z.ZodOptional<z.ZodNumber>;
             }, z.core.$strict>>;
             movieParams: z.ZodOptional<z.ZodObject<{
                 provider: z.ZodOptional<z.ZodEnum<{
@@ -14789,6 +14809,11 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
             bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+            movieVolume: z.ZodOptional<z.ZodNumber>;
+            ttsVolume: z.ZodOptional<z.ZodNumber>;
+            ducking: z.ZodOptional<z.ZodObject<{
+                ratio: z.ZodOptional<z.ZodNumber>;
+            }, z.core.$strip>>;
         }, z.core.$strict>>;
     }, z.core.$strip>>;
 }, z.core.$strict>;
@@ -15279,6 +15304,11 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
             bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
             suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
+            movieVolume: z.ZodOptional<z.ZodNumber>;
+            ttsVolume: z.ZodOptional<z.ZodNumber>;
+            ducking: z.ZodOptional<z.ZodObject<{
+                ratio: z.ZodOptional<z.ZodNumber>;
+            }, z.core.$strip>>;
         }, z.core.$strict>>;
     }, z.core.$strip>>;
     filename: z.ZodString;

package/lib/types/schema.js CHANGED Viewed

@@ -409,7 +409,7 @@ export const mulmoSlideParamsSchema = z
 export const beatAudioParamsSchema = z
     .object({
     padding: z.number().optional().describe("Padding between beats"), // seconds
-    movieVolume: z.number().optional().default(1.0).describe("Audio volume of the imported or generated movie"),
+    movieVolume: z.number().min(0).max(1).optional().describe("Audio volume of the imported or generated movie"),
 })
     .strict();
 export const mulmoHtmlImageParamsSchema = z
@@ -428,6 +428,14 @@ export const audioParamsSchema = z
     bgmVolume: z.number().optional().default(0.2).describe("Volume of the background music"),
     audioVolume: z.number().optional().default(1.0).describe("Volume of the audio"),
     suppressSpeech: z.boolean().optional().default(false).describe("Suppress speech generation"),
+    movieVolume: z.number().min(0).max(1).optional().describe("Default movie audio volume for all beats"),
+    ttsVolume: z.number().min(0).max(2).optional().describe("TTS narration volume before mixing with BGM/movie audio"),
+    ducking: z
+        .object({
+        ratio: z.number().min(0).max(1).optional().describe("Movie volume ratio during TTS beats (default 0.3)"),
+    })
+        .optional()
+        .describe("Auto-reduce movie audio when TTS is playing"),
 })
     .strict();
 export const htmlPromptParamsSchema = z