npm - mulmocast - Versions diffs - 2.6.4 → 2.6.6 - Mend

mulmocast 2.6.4 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/lib/actions/graph_option.d.ts +3 -0
package/lib/actions/graph_option.js +18 -0
package/lib/actions/image_references.d.ts +1 -0
package/lib/actions/image_references.js +50 -12
package/lib/actions/images.d.ts +2 -3
package/lib/actions/images.js +4 -17
package/lib/actions/movie.d.ts +3 -0
package/lib/actions/movie.js +38 -5
package/lib/agents/add_bgm_agent.d.ts +10 -0
package/lib/agents/add_bgm_agent.js +26 -4
package/lib/agents/movie_genai_agent.js +1 -1
package/lib/agents/movie_replicate_agent.js +29 -5
package/lib/types/provider2agent.d.ts +2 -0
package/lib/types/provider2agent.js +78 -5
package/lib/types/schema.d.ts +166 -4
package/lib/types/schema.js +11 -1
package/lib/utils/context.d.ts +72 -2
package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
package/lib/utils/image_plugins/html_tailwind.js +56 -5
package/package.json +13 -12
package/scripts/test/fixtures/movie_tone_high.mov +0 -0
package/scripts/test/fixtures/movie_tone_low.mov +0 -0
package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
package/scripts/test/test_audio_mix.json +91 -0
package/scripts/test/test_audio_mix_beat_vol.json +100 -0
package/scripts/test/test_audio_mix_ducking.json +91 -0
package/scripts/test/test_audio_mix_legacy.json +90 -0
package/scripts/test/test_grok.json +57 -0
package/scripts/test/test_image_prompt_reference.json +55 -0
package/scripts/test/test_image_references.json +74 -0
package/scripts/test/test_kling_v3.json +54 -0
package/scripts/test/test_kling_v3_omni.json +54 -0
package/scripts/test/test_lipsync2.json +48 -52
package/scripts/test/test_lipsync5.json +66 -0
package/scripts/test/test_runway.json +54 -0
package/scripts/test/test_threejs.json +241 -0
package/scripts/test/test_threejs_glb.json +154 -0
package/scripts/test/test_veo31_lite.json +39 -0

package/lib/actions/graph_option.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { GraphOptions } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;

package/lib/actions/graph_option.js ADDED Viewed

@@ -0,0 +1,18 @@
+import { TaskManager } from "graphai";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
+import { fileCacheAgentFilter } from "../utils/filters.js";
+import { settings2GraphAIConfig } from "../utils/utils.js";
+export const graphOption = async (context, settings) => {
+    const options = {
+        agentFilters: [
+            {
+                name: "fileCacheAgentFilter",
+                agent: fileCacheAgentFilter,
+                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
+            },
+        ],
+        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
+        config: settings2GraphAIConfig(settings, process.env),
+    };
+    return options;
+};

package/lib/actions/image_references.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ export declare const generateReferenceImage: (inputs: {
     key: string;
     index: number;
     image: MulmoImagePromptMedia;
+    referenceImagePath?: string;
     force?: boolean;
 }) => Promise<string>;
 export type MediaRefs = {

package/lib/actions/image_references.js CHANGED Viewed

@@ -1,18 +1,19 @@
 import { GraphAI, GraphAILogger } from "graphai";
 import { getReferenceImagePath } from "../utils/file.js";
-import { graphOption } from "./images.js";
+import { graphOption } from "./graph_option.js";
 import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
 import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
 import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
 // public api
 // Application may call this function directly to generate reference image.
 export const generateReferenceImage = async (inputs) => {
-    const { context, key, index, image, force } = inputs;
+    const { context, key, index, image, referenceImagePath, force } = inputs;
     const imagePath = getReferenceImagePath(context, key, "png");
     // generate image
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
     GraphAILogger.info(`Generating reference image for ${key}: ${prompt}`);
+    const referenceImages = referenceImagePath ? [referenceImagePath] : undefined;
     const image_graph_data = {
         version: 0.5,
         nodes: {
@@ -22,6 +23,7 @@ export const generateReferenceImage = async (inputs) => {
                 inputs: {
                     media: "image",
                     prompt,
+                    referenceImages,
                     cache: {
                         force: [context.force, force ?? false],
                         file: imagePath,
@@ -58,12 +60,14 @@ export const getMediaRefs = async (context) => {
     }
     const imageRefs = {};
     const movieRefs = {};
+    // Stage 1: resolve non-referencing entries (image, imagePrompt without referenceImageName, movie)
     await Promise.all(Object.keys(images)
         .sort()
         .map(async (key, index) => {
         const image = images[key];
-        if (image.type === "imagePrompt") {
-            imageRefs[key] = await generateReferenceImage({ context, key, index, image, force: false });
+        if (image.type === "imagePrompt" && !image.referenceImageName) {
+            const refPath = image.referenceImage ? await MulmoMediaSourceMethods.imageReference(image.referenceImage, context, key) : undefined;
+            imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
         }
         else if (image.type === "image") {
             imageRefs[key] = await MulmoMediaSourceMethods.imageReference(image.source, context, key);
@@ -72,10 +76,23 @@ export const getMediaRefs = async (context) => {
             movieRefs[key] = await resolveMovieReference(image, context, key);
         }
     }));
+    // Stage 2: resolve imagePrompt with referenceImageName (depends on Stage 1 results)
+    await Promise.all(Object.keys(images)
+        .sort()
+        .map(async (key, index) => {
+        const image = images[key];
+        if (image.type === "imagePrompt" && image.referenceImageName) {
+            const refPath = imageRefs[image.referenceImageName];
+            if (!refPath) {
+                GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${image.referenceImageName}" not found in imageRefs — generating without reference`);
+            }
+            imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
+        }
+    }));
     return { imageRefs, movieRefs };
 };
-const resolveMovieReference = async (movie, context, key) => {
-    return MulmoMediaSourceMethods.imageReference(movie.source, context, key);
+const resolveMovieReference = async (media, context, key) => {
+    return MulmoMediaSourceMethods.imageReference(media.source, context, key);
 };
 const generateReferenceMovie = async (inputs) => {
     const { context, key, index, moviePrompt, imagePath } = inputs;
@@ -124,17 +141,19 @@ const generateReferenceMovie = async (inputs) => {
 const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) => {
     const localImageRefs = {};
     const localMovieRefs = {};
-    // Stage 1: image, imagePrompt, movie (parallel)
+    // Stage 1: image, imagePrompt (without referenceImageName), movie (parallel)
     await Promise.all(Object.keys(images)
         .sort()
         .map(async (key, i) => {
         const entry = images[key];
-        if (entry.type === "imagePrompt") {
+        if (entry.type === "imagePrompt" && !entry.referenceImageName) {
+            const refPath = entry.referenceImage ? await MulmoMediaSourceMethods.imageReference(entry.referenceImage, context, key) : undefined;
             localImageRefs[key] = await generateReferenceImage({
                 context,
                 key,
                 index: beatIndex * 100 + i,
                 image: entry,
+                referenceImagePath: refPath,
             });
         }
         else if (entry.type === "image") {
@@ -144,20 +163,39 @@ const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) =>
             localMovieRefs[key] = await resolveMovieReference(entry, context, key);
         }
     }));
-    // Stage 2: moviePrompt (imageName references imageRefs only)
+    // Stage 2: imagePrompt with referenceImageName (depends on Stage 1)
+    const combinedImageRefsForImagePrompt = { ...globalImageRefs, ...localImageRefs };
+    await Promise.all(Object.keys(images)
+        .sort()
+        .map(async (key, i) => {
+        const entry = images[key];
+        if (entry.type === "imagePrompt" && entry.referenceImageName) {
+            const refPath = combinedImageRefsForImagePrompt[entry.referenceImageName];
+            if (!refPath) {
+                GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${entry.referenceImageName}" not found — generating without reference`);
+            }
+            localImageRefs[key] = await generateReferenceImage({
+                context,
+                key,
+                index: beatIndex * 100 + i,
+                image: entry,
+                referenceImagePath: refPath,
+            });
+        }
+    }));
+    // Stage 3: moviePrompt (imageName references imageRefs only)
     const combinedImageRefs = { ...globalImageRefs, ...localImageRefs };
     await Promise.all(Object.keys(images)
         .sort()
         .map(async (key, i) => {
         const entry = images[key];
         if (entry.type === "moviePrompt") {
-            const mp = entry;
-            const refImagePath = mp.imageName ? combinedImageRefs[mp.imageName] : undefined;
+            const refImagePath = entry.imageName ? combinedImageRefs[entry.imageName] : undefined;
             localMovieRefs[key] = await generateReferenceMovie({
                 context,
                 key,
                 index: beatIndex * 100 + i,
-                moviePrompt: mp,
+                moviePrompt: entry,
                 imagePath: refImagePath,
             });
         }

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { GraphOptions, GraphData } from "graphai";
+import type { GraphData } from "graphai";
 import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
 export declare const beat_graph_data: {
     version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
     };
 };
 export declare const images_graph_data: GraphData;
-export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
+export { graphOption } from "./graph_option.js";
 type ImageOptions = {
     imageAgents: Record<string, unknown>;
 };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
         withBackup?: boolean;
     };
 }) => Promise<void>;
-export {};

package/lib/actions/images.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import dotenv from "dotenv";
 import fs from "fs";
-import { GraphAI, GraphAILogger, TaskManager } from "graphai";
+import { GraphAI, GraphAILogger } from "graphai";
 import { AuthenticationError, RateLimitError } from "openai/index.js";
 import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
-import { settings2GraphAIConfig } from "../utils/utils.js";
 import { audioCheckerError } from "../utils/error_cause.js";
 import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
 import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
 import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
+import { graphOption } from "./graph_option.js";
 const vanillaAgents = vanilla.default ?? vanilla;
 const imageAgents = {
     imageGenAIAgent,
@@ -432,20 +431,8 @@ export const images_graph_data = {
         },
     },
 };
-export const graphOption = async (context, settings) => {
-    const options = {
-        agentFilters: [
-            {
-                name: "fileCacheAgentFilter",
-                agent: fileCacheAgentFilter,
-                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
-            },
-        ],
-        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
-        config: settings2GraphAIConfig(settings, process.env),
-    };
-    return options;
-};
+// graphOption moved to graph_option.ts to break circular dependency with image_references.ts
+export { graphOption } from "./graph_option.js";
 const prepareGenerateImages = async (context) => {
     const fileName = MulmoStudioContextMethods.getFileName(context);
     const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
 export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
 export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
 export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
+export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
+export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
+export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
 export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
 export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
     style: "aspectFit" | "aspectFill";

package/lib/actions/movie.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 const VIDEO_FPS = 30;
+const DEFAULT_DUCKING_RATIO = 0.3;
 export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
     const videoId = `v${inputIndex}`;
     const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
         return nextTransition !== null; // Any transition on next beat requires this beat's last frame
     });
 };
-const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
+export const resolveMovieVolume = (beat, context) => {
+    const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
+    const ducking = context.presentationStyle.audioParams.ducking;
+    const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
+    if (ducking && hasSpeech) {
+        const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
+        return baseMovieVolume * ratio;
+    }
+    return baseMovieVolume;
+};
+export const isExplicitMixMode = (context) => {
+    const audioParams = context.presentationStyle.audioParams;
+    const duckingRequested = audioParams.ducking !== undefined;
+    const speechSuppressed = audioParams.suppressSpeech === true;
+    const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
+    const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
+    return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
+};
+export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
     if (audioIdsFromMovieBeats.length > 0) {
         const mainAudioId = "mainaudio";
         const compositeAudioId = "composite";
         const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
-        FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
-        ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        const useExplicitMix = isExplicitMixMode(context);
+        if (useExplicitMix) {
+            // Explicit mode: normalize=0 + limiter.
+            // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
+            // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
+            const mixedId = "mixed";
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
+            // Limiter as failsafe
+            ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
+        }
+        else {
+            // Legacy mode: normalize=1 (current behavior, fully backward compatible)
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        }
         return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
     }
     return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
             transitionVideoIds.push(transitionVideoId);
         }
         // NOTE: We don't support audio if the speed is not 1.0.
-        const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
+        const movieVolume = resolveMovieVolume(beat, context);
         if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
             // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
     }
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
-    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
+    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const endTime = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);

package/lib/agents/add_bgm_agent.d.ts CHANGED Viewed

@@ -1,3 +1,13 @@
 import type { AgentFunctionInfo } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
+    useExplicitMix: boolean;
+    voiceVolume: number;
+};
+export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
+    amixNormalize: string;
+    mixedOutputId: string;
+    limiterFilter: string | undefined;
+};
 declare const addBGMAgentInfo: AgentFunctionInfo;
 export default addBGMAgentInfo;

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { isFile } from "../utils/file.js";
 import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
+export const resolveAddBgmMixParams = (audioParams) => {
+    const useExplicitMix = audioParams.ttsVolume !== undefined;
+    const ttsVolume = audioParams.ttsVolume ?? 1.0;
+    return {
+        useExplicitMix,
+        voiceVolume: audioParams.audioVolume * ttsVolume,
+    };
+};
+export const resolveAddBgmFilterConfig = (useExplicitMix) => {
+    const amixNormalize = useExplicitMix ? ":normalize=0" : "";
+    return {
+        amixNormalize,
+        mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
+        limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
+    };
+};
 const addBGMAgent = async ({ namedInputs, params, }) => {
     const { voiceFile, outputFile, context } = namedInputs;
     const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
     const ffmpegContext = FfmpegContextInit();
     const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
     const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
-    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
-    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
-    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
-    ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
+    const audioParams = context.presentationStyle.audioParams;
+    const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
+    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
+    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
+    const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
+    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
+    if (limiterFilter) {
+        ffmpegContext.filterComplex.push(limiterFilter);
+    }
+    ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
     ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
     try {
         await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);

package/lib/agents/movie_genai_agent.js CHANGED Viewed

@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
         model,
         prompt,
         config: {
-            durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
+            durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
             aspectRatio,
             personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
         },

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
 import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
 import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
-async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
+function replicate_get_videoUrl(output) {
+    if (typeof output === "string")
+        return output;
+    if (output && typeof output === "object" && "url" in output)
+        return output.url();
+    return undefined;
+}
+async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
     const replicate = new Replicate({
         auth: apiKey,
     });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
             input.image = base64Image;
         }
     }
+    // Add reference images if provided and model supports it
+    const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
+    if (referenceImages && referenceImages.length > 0) {
+        if (!referenceImagesParam) {
+            GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
+        }
+        else if (imagePath) {
+            GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
+        }
+        else {
+            input[referenceImagesParam] = referenceImages.map((ref) => {
+                const buffer = readFileSync(ref.imagePath);
+                return `data:image/png;base64,${buffer.toString("base64")}`;
+            });
+        }
+    }
     // Add last frame image if provided and model supports it
     if (lastFrameImagePath) {
         const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
     try {
         const output = await replicate.run(model, { input });
         // Download the generated video
-        if (output && typeof output === "object" && "url" in output) {
-            const videoUrl = output.url();
+        // Some models return a FileOutput object with a url() method; others return a plain string URL.
+        const videoUrl = replicate_get_videoUrl(output);
+        if (videoUrl) {
             const videoResponse = await fetch(videoUrl);
             if (!videoResponse.ok) {
                 throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
     return "9:16";
 };
 export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
-    const { prompt, imagePath, lastFrameImagePath } = namedInputs;
+    const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
     if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
         });
     }
     try {
-        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
+        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
         if (buffer) {
             return { buffer };
         }

package/lib/types/provider2agent.d.ts CHANGED Viewed

@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
             durations: number[];
             start_image: string | undefined;
             last_image?: string;
+            reference_images_param?: string;
             price_per_sec: number;
         }>;
     };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
         keyName: string;
         modelParams: Record<string, {
             durations: number[];
+            supportsDuration: boolean;
             supportsLastFrame: boolean;
             supportsReferenceImages: boolean;
             supportsPersonGeneration: boolean;

package/lib/types/provider2agent.js CHANGED Viewed

@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
             "kwaivgi/kling-v2.1-master",
             "google/veo-2",
             "google/veo-3",
+            "google/veo-3.1",
+            "google/veo-3.1-fast",
+            "google/veo-3.1-lite",
             "google/veo-3-fast",
             "minimax/video-01",
             "minimax/hailuo-02",
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
             "pixverse/pixverse-v4.5",
             "wan-video/wan-2.2-i2v-fast",
             "wan-video/wan-2.2-t2v-fast",
+            "xai/grok-imagine-video",
+            "xai/grok-imagine-r2v",
+            "runwayml/gen-4.5",
+            "kwaivgi/kling-v3-omni-video",
+            "kwaivgi/kling-v3-video",
         ],
         modelParams: {
             "bytedance/seedance-1-lite": {
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
                 start_image: "image",
                 price_per_sec: 0.75,
             },
+            "google/veo-3.1": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.75,
+            },
+            "google/veo-3.1-fast": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame_image",
+                price_per_sec: 0.4,
+            },
+            "google/veo-3.1-lite": {
+                durations: [4, 6, 8],
+                start_image: "image",
+                last_image: "last_frame",
+                price_per_sec: 0.05,
+            },
             "google/veo-3-fast": {
                 durations: [8],
                 start_image: "image",
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
                 start_image: undefined,
                 price_per_sec: 0.012,
             },
+            "xai/grok-imagine-video": {
+                durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "image",
+                price_per_sec: 0.08,
+            },
+            "xai/grok-imagine-r2v": {
+                durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                start_image: undefined,
+                reference_images_param: "reference_images",
+                price_per_sec: 0.08,
+            },
+            "runwayml/gen-4.5": {
+                durations: [5, 10],
+                start_image: "image",
+                price_per_sec: 0.25,
+            },
+            "kwaivgi/kling-v3-omni-video": {
+                durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "start_image",
+                last_image: "end_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.3,
+            },
+            "kwaivgi/kling-v3-video": {
+                durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "start_image",
+                last_image: "end_image",
+                reference_images_param: "reference_images",
+                price_per_sec: 0.3,
+            },
         },
     },
     google: {
         agentName: "movieGenAIAgent",
         defaultModel: "veo-2.0-generate-001",
-        models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
+        models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
         keyName: "GEMINI_API_KEY",
         modelParams: {
+            "veo-3.1-lite-generate-preview": {
+                durations: [4, 6, 8],
+                supportsDuration: true,
+                supportsLastFrame: true,
+                supportsReferenceImages: false,
+                supportsPersonGeneration: false,
+            },
             "veo-3.1-generate-preview": {
                 durations: [4, 6, 8],
+                supportsDuration: true,
                 supportsLastFrame: true,
                 supportsReferenceImages: true,
                 supportsPersonGeneration: false,
             },
             "veo-3.0-generate-001": {
-                durations: [4, 6, 8],
+                durations: [8],
+                supportsDuration: false, // Veo 3.0 always generates 8s
                 supportsLastFrame: false,
                 supportsReferenceImages: false,
                 supportsPersonGeneration: false,
             },
             "veo-2.0-generate-001": {
-                durations: [5, 6, 7, 8],
+                durations: [5, 6, 8],
+                supportsDuration: true,
                 supportsLastFrame: false, // Vertex AI only
                 supportsReferenceImages: false,
                 supportsPersonGeneration: true,
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
         agentName: "lipSyncReplicateAgent",
         defaultModel: "bytedance/omni-human",
         keyName: "REPLICATE_API_TOKEN",
-        models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
+        models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
         modelParams: {
             "bytedance/latentsync": {
                 identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
                 audio: "audio",
                 price_per_sec: 0.14,
             },
+            "pixverse/lipsync": {
+                identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
+                video: "video",
+                audio: "audio",
+            },
             /* NOTE: This model does not work with large base64 urls.
             "sync/lipsync-2": {
               video: "video",
               audio: "audio",
             },
             */
-            /* NOTE: This model does not work well for some unknown reason.
+            /* NOTE: This model does not work with base64 data URIs (error 1201).
             "kwaivgi/kling-lip-sync": {
+              identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
               video: "video_url",
               audio: "audio_file",
             },