npm - mulmocast - Versions diffs - 2.6.5 → 2.6.7 - Mend

mulmocast 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/lib/actions/audio.js +7 -35
package/lib/actions/graph_option.d.ts +4 -0
package/lib/actions/graph_option.js +19 -0
package/lib/actions/image_references.js +3 -3
package/lib/actions/images.d.ts +2 -3
package/lib/actions/images.js +5 -19
package/lib/actions/movie.d.ts +3 -0
package/lib/actions/movie.js +38 -5
package/lib/agents/add_bgm_agent.d.ts +10 -0
package/lib/agents/add_bgm_agent.js +26 -4
package/lib/agents/movie_genai_agent.js +1 -1
package/lib/agents/movie_replicate_agent.js +29 -5
package/lib/methods/mulmo_presentation_style.d.ts +5 -1
package/lib/methods/mulmo_presentation_style.js +27 -3
package/lib/types/provider2agent.d.ts +2 -0
package/lib/types/provider2agent.js +78 -5
package/lib/types/schema.d.ts +55 -4
package/lib/types/schema.js +12 -1
package/lib/utils/context.d.ts +28 -2
package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
package/lib/utils/image_plugins/html_tailwind.js +56 -5
package/package.json +18 -16
package/scripts/test/fixtures/movie_tone_high.mov +0 -0
package/scripts/test/fixtures/movie_tone_low.mov +0 -0
package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
package/scripts/test/test_audio_mix.json +91 -0
package/scripts/test/test_audio_mix_beat_vol.json +100 -0
package/scripts/test/test_audio_mix_ducking.json +91 -0
package/scripts/test/test_audio_mix_legacy.json +90 -0
package/scripts/test/test_grok.json +57 -0
package/scripts/test/test_image_references.json +74 -0
package/scripts/test/test_kling_v3.json +54 -0
package/scripts/test/test_kling_v3_omni.json +54 -0
package/scripts/test/test_lipsync2.json +48 -52
package/scripts/test/test_lipsync5.json +66 -0
package/scripts/test/test_runway.json +54 -0
package/scripts/test/test_threejs.json +241 -0
package/scripts/test/test_threejs_glb.json +154 -0
package/scripts/test/test_veo31_lite.json +39 -0

package/lib/actions/audio.js CHANGED Viewed

@@ -1,12 +1,11 @@
 import dotenv from "dotenv";
-import { GraphAI, TaskManager, GraphAILogger } from "graphai";
+import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
-import { text2SpeechProviderSchema } from "../types/index.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
+import { audioGraphOption } from "./graph_option.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getGroupedAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
-import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
+import { localizedText } from "../utils/utils.js";
 import { text2hash } from "../utils/utils_node.js";
 import { provider2TTSAgent } from "../types/provider2agent.js";
 import { invalidAudioSourceError } from "../utils/error_cause.js";
@@ -210,21 +209,6 @@ export const audio_graph_data = {
         },
     },
 };
-const agentFilters = [
-    {
-        name: "fileCacheAgentFilter",
-        agent: fileCacheAgentFilter,
-        nodeIds: ["tts"],
-    },
-];
-const getConcurrency = (context) => {
-    // Check if any speaker uses elevenlabs or kotodama (providers that require concurrency = 1)
-    const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
-        const provider = text2SpeechProviderSchema.parse(speaker.provider);
-        return provider2TTSAgent[provider].hasLimitedConcurrency;
-    });
-    return hasLimitedConcurrencyProvider ? 1 : 8;
-};
 const audioAgents = {
     ...vanillaAgents,
     fileWriteAgent,
@@ -246,9 +230,8 @@ export const generateBeatAudio = async (index, context, args) => {
         const audioSegmentDirPath = context.fileDirs.grouped ? audioDirPath : resolveDirPath(audioDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        const config = settings2GraphAIConfig(settings);
-        const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
+        const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, await audioGraphOption(context, settings));
+        callbacks?.forEach((callback) => graph.registerCallback(callback));
         graph.injectValue("__mapIndex", index);
         graph.injectValue("beat", context.studio.script.beats[index]);
         graph.injectValue("studioBeat", context.studio.beats[index]);
@@ -260,11 +243,6 @@ export const generateBeatAudio = async (index, context, args) => {
         else {
             graph.injectValue("lang", context.lang);
         }
-        if (callbacks) {
-            callbacks.forEach((callback) => {
-                graph.registerCallback(callback);
-            });
-        }
         await graph.run();
     }
     catch (error) {
@@ -288,19 +266,13 @@ export const audio = async (context, args) => {
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        const config = settings2GraphAIConfig(settings, process.env);
-        const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
+        const graph = new GraphAI(audio_graph_data, audioAgents, await audioGraphOption(context, settings));
+        callbacks?.forEach((callback) => graph.registerCallback(callback));
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
         graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
-        if (callbacks) {
-            callbacks.forEach((callback) => {
-                graph.registerCallback(callback);
-            });
-        }
         const result = await graph.run();
         writingMessage(audioCombinedFilePath);
         MulmoStudioContextMethods.setSessionState(context, "audio", false, true);

package/lib/actions/graph_option.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import type { GraphOptions } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const imageGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
+export declare const audioGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;

package/lib/actions/graph_option.js ADDED Viewed

@@ -0,0 +1,19 @@
+import { TaskManager } from "graphai";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
+import { fileCacheAgentFilter } from "../utils/filters.js";
+import { settings2GraphAIConfig } from "../utils/utils.js";
+const createGraphOption = (concurrency, cacheNodeIds, settings) => ({
+    agentFilters: [
+        {
+            name: "fileCacheAgentFilter",
+            agent: fileCacheAgentFilter,
+            nodeIds: cacheNodeIds,
+        },
+    ],
+    taskManager: new TaskManager(concurrency),
+    config: settings2GraphAIConfig(settings, process.env),
+});
+const IMAGE_CACHE_NODE_IDS = ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"];
+const AUDIO_CACHE_NODE_IDS = ["tts"];
+export const imageGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getImageConcurrency(context.presentationStyle), IMAGE_CACHE_NODE_IDS, settings);
+export const audioGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getAudioConcurrency(context.presentationStyle), AUDIO_CACHE_NODE_IDS, settings);

package/lib/actions/image_references.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GraphAI, GraphAILogger } from "graphai";
 import { getReferenceImagePath } from "../utils/file.js";
-import { graphOption } from "./images.js";
+import { imageGraphOption } from "./graph_option.js";
 import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
 import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
 import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
@@ -41,7 +41,7 @@ export const generateReferenceImage = async (inputs) => {
         },
     };
     try {
-        const options = await graphOption(context);
+        const options = await imageGraphOption(context);
         const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent, imageReplicateAgent }, options);
         await graph.run();
         return imagePath;
@@ -126,7 +126,7 @@ const generateReferenceMovie = async (inputs) => {
         },
     };
     try {
-        const options = await graphOption(context);
+        const options = await imageGraphOption(context);
         const graph = new GraphAI(movie_graph_data, { movieGenAIAgent, movieReplicateAgent, mediaMockAgent }, options);
         await graph.run();
         return moviePath;

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { GraphOptions, GraphData } from "graphai";
+import type { GraphData } from "graphai";
 import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
 export declare const beat_graph_data: {
     version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
     };
 };
 export declare const images_graph_data: GraphData;
-export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
+export { imageGraphOption } from "./graph_option.js";
 type ImageOptions = {
     imageAgents: Record<string, unknown>;
 };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
         withBackup?: boolean;
     };
 }) => Promise<void>;
-export {};

package/lib/actions/images.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import dotenv from "dotenv";
 import fs from "fs";
-import { GraphAI, GraphAILogger, TaskManager } from "graphai";
+import { GraphAI, GraphAILogger } from "graphai";
 import { AuthenticationError, RateLimitError } from "openai/index.js";
 import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
-import { settings2GraphAIConfig } from "../utils/utils.js";
 import { audioCheckerError } from "../utils/error_cause.js";
 import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
 import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
 import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
+import { imageGraphOption } from "./graph_option.js";
 const vanillaAgents = vanilla.default ?? vanilla;
 const imageAgents = {
     imageGenAIAgent,
@@ -432,20 +431,7 @@ export const images_graph_data = {
         },
     },
 };
-export const graphOption = async (context, settings) => {
-    const options = {
-        agentFilters: [
-            {
-                name: "fileCacheAgentFilter",
-                agent: fileCacheAgentFilter,
-                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
-            },
-        ],
-        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
-        config: settings2GraphAIConfig(settings, process.env),
-    };
-    return options;
-};
+export { imageGraphOption } from "./graph_option.js";
 const prepareGenerateImages = async (context) => {
     const fileName = MulmoStudioContextMethods.getFileName(context);
     const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -472,7 +458,7 @@ const generateImages = async (context, args) => {
         ...defaultAgents,
         ...optionImageAgents,
     };
-    const graph = new GraphAI(images_graph_data, graphaiAgent, await graphOption(context, settings));
+    const graph = new GraphAI(images_graph_data, graphaiAgent, await imageGraphOption(context, settings));
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
@@ -512,7 +498,7 @@ export const generateBeatImage = async (inputs) => {
     try {
         const { index, context, args } = inputs;
         const { settings, callbacks, forceMovie, forceImage, forceLipSync, forceSoundEffect, withBackup } = args ?? {};
-        const options = await graphOption(context, settings);
+        const options = await imageGraphOption(context, settings);
         const injections = await prepareGenerateImages(context);
         const graph = new GraphAI(beat_graph_data, defaultAgents, options);
         Object.keys(injections).forEach((key) => {

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
 export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
 export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
 export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
+export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
+export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
+export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
 export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
 export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
     style: "aspectFit" | "aspectFill";

package/lib/actions/movie.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 const VIDEO_FPS = 30;
+const DEFAULT_DUCKING_RATIO = 0.3;
 export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
     const videoId = `v${inputIndex}`;
     const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
         return nextTransition !== null; // Any transition on next beat requires this beat's last frame
     });
 };
-const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
+export const resolveMovieVolume = (beat, context) => {
+    const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
+    const ducking = context.presentationStyle.audioParams.ducking;
+    const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
+    if (ducking && hasSpeech) {
+        const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
+        return baseMovieVolume * ratio;
+    }
+    return baseMovieVolume;
+};
+export const isExplicitMixMode = (context) => {
+    const audioParams = context.presentationStyle.audioParams;
+    const duckingRequested = audioParams.ducking !== undefined;
+    const speechSuppressed = audioParams.suppressSpeech === true;
+    const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
+    const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
+    return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
+};
+export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
     if (audioIdsFromMovieBeats.length > 0) {
         const mainAudioId = "mainaudio";
         const compositeAudioId = "composite";
         const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
-        FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
-        ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        const useExplicitMix = isExplicitMixMode(context);
+        if (useExplicitMix) {
+            // Explicit mode: normalize=0 + limiter.
+            // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
+            // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
+            const mixedId = "mixed";
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
+            // Limiter as failsafe
+            ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
+        }
+        else {
+            // Legacy mode: normalize=1 (current behavior, fully backward compatible)
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+        }
         return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
     }
     return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
             transitionVideoIds.push(transitionVideoId);
         }
         // NOTE: We don't support audio if the speed is not 1.0.
-        const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
+        const movieVolume = resolveMovieVolume(beat, context);
         if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
             // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
     }
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
-    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
+    const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const endTime = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);

package/lib/agents/add_bgm_agent.d.ts CHANGED Viewed

@@ -1,3 +1,13 @@
 import type { AgentFunctionInfo } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
+    useExplicitMix: boolean;
+    voiceVolume: number;
+};
+export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
+    amixNormalize: string;
+    mixedOutputId: string;
+    limiterFilter: string | undefined;
+};
 declare const addBGMAgentInfo: AgentFunctionInfo;
 export default addBGMAgentInfo;

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { isFile } from "../utils/file.js";
 import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
+export const resolveAddBgmMixParams = (audioParams) => {
+    const useExplicitMix = audioParams.ttsVolume !== undefined;
+    const ttsVolume = audioParams.ttsVolume ?? 1.0;
+    return {
+        useExplicitMix,
+        voiceVolume: audioParams.audioVolume * ttsVolume,
+    };
+};
+export const resolveAddBgmFilterConfig = (useExplicitMix) => {
+    const amixNormalize = useExplicitMix ? ":normalize=0" : "";
+    return {
+        amixNormalize,
+        mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
+        limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
+    };
+};
 const addBGMAgent = async ({ namedInputs, params, }) => {
     const { voiceFile, outputFile, context } = namedInputs;
     const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
     const ffmpegContext = FfmpegContextInit();
     const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
     const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
-    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
-    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
-    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
-    ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
+    const audioParams = context.presentationStyle.audioParams;
+    const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
+    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
+    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
+    const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
+    ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
+    if (limiterFilter) {
+        ffmpegContext.filterComplex.push(limiterFilter);
+    }
+    ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
     ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
     try {
         await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);

package/lib/agents/movie_genai_agent.js CHANGED Viewed

@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
         model,
         prompt,
         config: {
-            durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
+            durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
             aspectRatio,
             personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
         },

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
 import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
 import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
-async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
+function replicate_get_videoUrl(output) {
+    if (typeof output === "string")
+        return output;
+    if (output && typeof output === "object" && "url" in output)
+        return output.url();
+    return undefined;
+}
+async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
     const replicate = new Replicate({
         auth: apiKey,
     });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
             input.image = base64Image;
         }
     }
+    // Add reference images if provided and model supports it
+    const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
+    if (referenceImages && referenceImages.length > 0) {
+        if (!referenceImagesParam) {
+            GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
+        }
+        else if (imagePath) {
+            GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
+        }
+        else {
+            input[referenceImagesParam] = referenceImages.map((ref) => {
+                const buffer = readFileSync(ref.imagePath);
+                return `data:image/png;base64,${buffer.toString("base64")}`;
+            });
+        }
+    }
     // Add last frame image if provided and model supports it
     if (lastFrameImagePath) {
         const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
     try {
         const output = await replicate.run(model, { input });
         // Download the generated video
-        if (output && typeof output === "object" && "url" in output) {
-            const videoUrl = output.url();
+        // Some models return a FileOutput object with a url() method; others return a plain string URL.
+        const videoUrl = replicate_get_videoUrl(output);
+        if (videoUrl) {
             const videoResponse = await fetch(videoUrl);
             if (!videoResponse.ok) {
                 throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
     return "9:16";
 };
 export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
-    const { prompt, imagePath, lastFrameImagePath } = namedInputs;
+    const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
     if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
         });
     }
     try {
-        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
+        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
         if (buffer) {
             return { buffer };
         }

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -179,6 +179,7 @@ export declare const MulmoPresentationStyleMethods: {
                 imageName: string;
                 referenceType: "ASSET" | "STYLE";
             }[] | undefined;
+            concurrency?: number | undefined;
             speed?: number | undefined;
         };
         keyName: string;
@@ -204,7 +205,10 @@ export declare const MulmoPresentationStyleMethods: {
             image?: string;
         }>;
     };
-    getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
+    /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
+    getImageConcurrency(presentationStyle: MulmoPresentationStyle): number;
+    /** Concurrency for audio/TTS generation graph */
+    getAudioConcurrency(presentationStyle: MulmoPresentationStyle): number;
     getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
     getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
 };

package/lib/methods/mulmo_presentation_style.js CHANGED Viewed

@@ -6,10 +6,10 @@
 import { isNull } from "graphai";
 import { userAssert } from "../utils/utils.js";
 import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
-import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
+import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2TTSAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
 const defaultTextSlideStyles = [
     '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
-    "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
+    "body { margin: 60px; margin-top: 40px; color:#333; background-color:#fff; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
     "h1 { font-size: 56px; margin-bottom: 20px; text-align: center }",
     "h2 { font-size: 48px; text-align: center }",
     "h3 { font-size: 36px }",
@@ -119,7 +119,17 @@ export const MulmoPresentationStyleMethods = {
         const agentInfo = provider2LipSyncAgent[lipSyncProvider];
         return agentInfo;
     },
-    getConcurrency(presentationStyle) {
+    /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
+    getImageConcurrency(presentationStyle) {
+        const imageConcurrency = presentationStyle.imageParams?.concurrency;
+        const movieConcurrency = presentationStyle.movieParams?.concurrency;
+        // User-specified concurrency takes precedence.
+        // Use the smaller of imageParams/movieParams since they share the same graph.
+        if (imageConcurrency !== undefined || movieConcurrency !== undefined) {
+            const values = [imageConcurrency, movieConcurrency].filter((v) => v !== undefined);
+            return Math.min(...values);
+        }
+        // Fallback: provider-based auto-detection
         const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
         if (imageAgentInfo.imageParams.provider === "openai") {
             // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -131,6 +141,20 @@ export const MulmoPresentationStyleMethods = {
         }
         return 4;
     },
+    /** Concurrency for audio/TTS generation graph */
+    getAudioConcurrency(presentationStyle) {
+        // User-specified concurrency takes precedence
+        const userConcurrency = presentationStyle.audioParams?.concurrency;
+        if (userConcurrency !== undefined) {
+            return userConcurrency;
+        }
+        // Fallback: provider-based auto-detection
+        const hasLimitedConcurrencyProvider = Object.values(presentationStyle.speechParams.speakers).some((speaker) => {
+            const provider = text2SpeechProviderSchema.parse(speaker.provider);
+            return provider2TTSAgent[provider].hasLimitedConcurrency;
+        });
+        return hasLimitedConcurrencyProvider ? 1 : 8;
+    },
     getHtmlImageAgentInfo(presentationStyle) {
         const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
         const defaultConfig = provider2LLMAgent[provider];

package/lib/types/provider2agent.d.ts CHANGED Viewed

@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
             durations: number[];
             start_image: string | undefined;
             last_image?: string;
+            reference_images_param?: string;
             price_per_sec: number;
         }>;
     };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
         keyName: string;
         modelParams: Record<string, {
             durations: number[];
+            supportsDuration: boolean;
             supportsLastFrame: boolean;
             supportsReferenceImages: boolean;
             supportsPersonGeneration: boolean;