mulmocast 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/lib/actions/audio.js +7 -35
  2. package/lib/actions/graph_option.d.ts +4 -0
  3. package/lib/actions/graph_option.js +19 -0
  4. package/lib/actions/image_references.js +3 -3
  5. package/lib/actions/images.d.ts +2 -3
  6. package/lib/actions/images.js +5 -19
  7. package/lib/actions/movie.d.ts +3 -0
  8. package/lib/actions/movie.js +38 -5
  9. package/lib/agents/add_bgm_agent.d.ts +10 -0
  10. package/lib/agents/add_bgm_agent.js +26 -4
  11. package/lib/agents/movie_genai_agent.js +1 -1
  12. package/lib/agents/movie_replicate_agent.js +29 -5
  13. package/lib/methods/mulmo_presentation_style.d.ts +5 -1
  14. package/lib/methods/mulmo_presentation_style.js +27 -3
  15. package/lib/types/provider2agent.d.ts +2 -0
  16. package/lib/types/provider2agent.js +78 -5
  17. package/lib/types/schema.d.ts +55 -4
  18. package/lib/types/schema.js +12 -1
  19. package/lib/utils/context.d.ts +28 -2
  20. package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
  21. package/lib/utils/image_plugins/html_tailwind.js +56 -5
  22. package/package.json +18 -16
  23. package/scripts/test/fixtures/movie_tone_high.mov +0 -0
  24. package/scripts/test/fixtures/movie_tone_low.mov +0 -0
  25. package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
  26. package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
  27. package/scripts/test/test_audio_mix.json +91 -0
  28. package/scripts/test/test_audio_mix_beat_vol.json +100 -0
  29. package/scripts/test/test_audio_mix_ducking.json +91 -0
  30. package/scripts/test/test_audio_mix_legacy.json +90 -0
  31. package/scripts/test/test_grok.json +57 -0
  32. package/scripts/test/test_image_references.json +74 -0
  33. package/scripts/test/test_kling_v3.json +54 -0
  34. package/scripts/test/test_kling_v3_omni.json +54 -0
  35. package/scripts/test/test_lipsync2.json +48 -52
  36. package/scripts/test/test_lipsync5.json +66 -0
  37. package/scripts/test/test_runway.json +54 -0
  38. package/scripts/test/test_threejs.json +241 -0
  39. package/scripts/test/test_threejs_glb.json +154 -0
  40. package/scripts/test/test_veo31_lite.json +39 -0
@@ -1,12 +1,11 @@
1
1
  import dotenv from "dotenv";
2
- import { GraphAI, TaskManager, GraphAILogger } from "graphai";
2
+ import { GraphAI, GraphAILogger } from "graphai";
3
3
  import * as agents from "@graphai/vanilla";
4
4
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
5
  import { ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
6
- import { text2SpeechProviderSchema } from "../types/index.js";
7
- import { fileCacheAgentFilter } from "../utils/filters.js";
6
+ import { audioGraphOption } from "./graph_option.js";
8
7
  import { getAudioArtifactFilePath, getAudioFilePath, getGroupedAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
9
- import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
8
+ import { localizedText } from "../utils/utils.js";
10
9
  import { text2hash } from "../utils/utils_node.js";
11
10
  import { provider2TTSAgent } from "../types/provider2agent.js";
12
11
  import { invalidAudioSourceError } from "../utils/error_cause.js";
@@ -210,21 +209,6 @@ export const audio_graph_data = {
210
209
  },
211
210
  },
212
211
  };
213
- const agentFilters = [
214
- {
215
- name: "fileCacheAgentFilter",
216
- agent: fileCacheAgentFilter,
217
- nodeIds: ["tts"],
218
- },
219
- ];
220
- const getConcurrency = (context) => {
221
- // Check if any speaker uses elevenlabs or kotodama (providers that require concurrency = 1)
222
- const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
223
- const provider = text2SpeechProviderSchema.parse(speaker.provider);
224
- return provider2TTSAgent[provider].hasLimitedConcurrency;
225
- });
226
- return hasLimitedConcurrencyProvider ? 1 : 8;
227
- };
228
212
  const audioAgents = {
229
213
  ...vanillaAgents,
230
214
  fileWriteAgent,
@@ -246,9 +230,8 @@ export const generateBeatAudio = async (index, context, args) => {
246
230
  const audioSegmentDirPath = context.fileDirs.grouped ? audioDirPath : resolveDirPath(audioDirPath, fileName);
247
231
  mkdir(outDirPath);
248
232
  mkdir(audioSegmentDirPath);
249
- const config = settings2GraphAIConfig(settings);
250
- const taskManager = new TaskManager(getConcurrency(context));
251
- const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
233
+ const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, await audioGraphOption(context, settings));
234
+ callbacks?.forEach((callback) => graph.registerCallback(callback));
252
235
  graph.injectValue("__mapIndex", index);
253
236
  graph.injectValue("beat", context.studio.script.beats[index]);
254
237
  graph.injectValue("studioBeat", context.studio.beats[index]);
@@ -260,11 +243,6 @@ export const generateBeatAudio = async (index, context, args) => {
260
243
  else {
261
244
  graph.injectValue("lang", context.lang);
262
245
  }
263
- if (callbacks) {
264
- callbacks.forEach((callback) => {
265
- graph.registerCallback(callback);
266
- });
267
- }
268
246
  await graph.run();
269
247
  }
270
248
  catch (error) {
@@ -288,19 +266,13 @@ export const audio = async (context, args) => {
288
266
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
289
267
  mkdir(outDirPath);
290
268
  mkdir(audioSegmentDirPath);
291
- const config = settings2GraphAIConfig(settings, process.env);
292
- const taskManager = new TaskManager(getConcurrency(context));
293
- const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
269
+ const graph = new GraphAI(audio_graph_data, audioAgents, await audioGraphOption(context, settings));
270
+ callbacks?.forEach((callback) => graph.registerCallback(callback));
294
271
  graph.injectValue("context", context);
295
272
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
296
273
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
297
274
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
298
275
  graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
299
- if (callbacks) {
300
- callbacks.forEach((callback) => {
301
- graph.registerCallback(callback);
302
- });
303
- }
304
276
  const result = await graph.run();
305
277
  writingMessage(audioCombinedFilePath);
306
278
  MulmoStudioContextMethods.setSessionState(context, "audio", false, true);
@@ -0,0 +1,4 @@
1
+ import type { GraphOptions } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const imageGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
4
+ export declare const audioGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
@@ -0,0 +1,19 @@
1
+ import { TaskManager } from "graphai";
2
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
3
+ import { fileCacheAgentFilter } from "../utils/filters.js";
4
+ import { settings2GraphAIConfig } from "../utils/utils.js";
5
+ const createGraphOption = (concurrency, cacheNodeIds, settings) => ({
6
+ agentFilters: [
7
+ {
8
+ name: "fileCacheAgentFilter",
9
+ agent: fileCacheAgentFilter,
10
+ nodeIds: cacheNodeIds,
11
+ },
12
+ ],
13
+ taskManager: new TaskManager(concurrency),
14
+ config: settings2GraphAIConfig(settings, process.env),
15
+ });
16
+ const IMAGE_CACHE_NODE_IDS = ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"];
17
+ const AUDIO_CACHE_NODE_IDS = ["tts"];
18
+ export const imageGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getImageConcurrency(context.presentationStyle), IMAGE_CACHE_NODE_IDS, settings);
19
+ export const audioGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getAudioConcurrency(context.presentationStyle), AUDIO_CACHE_NODE_IDS, settings);
@@ -1,6 +1,6 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
2
  import { getReferenceImagePath } from "../utils/file.js";
3
- import { graphOption } from "./images.js";
3
+ import { imageGraphOption } from "./graph_option.js";
4
4
  import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
5
5
  import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
6
6
  import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
@@ -41,7 +41,7 @@ export const generateReferenceImage = async (inputs) => {
41
41
  },
42
42
  };
43
43
  try {
44
- const options = await graphOption(context);
44
+ const options = await imageGraphOption(context);
45
45
  const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent, imageReplicateAgent }, options);
46
46
  await graph.run();
47
47
  return imagePath;
@@ -126,7 +126,7 @@ const generateReferenceMovie = async (inputs) => {
126
126
  },
127
127
  };
128
128
  try {
129
- const options = await graphOption(context);
129
+ const options = await imageGraphOption(context);
130
130
  const graph = new GraphAI(movie_graph_data, { movieGenAIAgent, movieReplicateAgent, mediaMockAgent }, options);
131
131
  await graph.run();
132
132
  return moviePath;
@@ -1,4 +1,4 @@
1
- import type { GraphOptions, GraphData } from "graphai";
1
+ import type { GraphData } from "graphai";
2
2
  import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
3
3
  export declare const beat_graph_data: {
4
4
  version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
463
463
  };
464
464
  };
465
465
  export declare const images_graph_data: GraphData;
466
- export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
466
+ export { imageGraphOption } from "./graph_option.js";
467
467
  type ImageOptions = {
468
468
  imageAgents: Record<string, unknown>;
469
469
  };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
481
481
  withBackup?: boolean;
482
482
  };
483
483
  }) => Promise<void>;
484
- export {};
@@ -1,6 +1,6 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
- import { GraphAI, GraphAILogger, TaskManager } from "graphai";
3
+ import { GraphAI, GraphAILogger } from "graphai";
4
4
  import { AuthenticationError, RateLimitError } from "openai/index.js";
5
5
  import * as vanilla from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
10
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
11
  import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
12
12
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
13
- import { fileCacheAgentFilter } from "../utils/filters.js";
14
- import { settings2GraphAIConfig } from "../utils/utils.js";
15
13
  import { audioCheckerError } from "../utils/error_cause.js";
16
14
  import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
17
15
  import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
18
16
  import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ import { imageGraphOption } from "./graph_option.js";
19
18
  const vanillaAgents = vanilla.default ?? vanilla;
20
19
  const imageAgents = {
21
20
  imageGenAIAgent,
@@ -432,20 +431,7 @@ export const images_graph_data = {
432
431
  },
433
432
  },
434
433
  };
435
- export const graphOption = async (context, settings) => {
436
- const options = {
437
- agentFilters: [
438
- {
439
- name: "fileCacheAgentFilter",
440
- agent: fileCacheAgentFilter,
441
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
442
- },
443
- ],
444
- taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
445
- config: settings2GraphAIConfig(settings, process.env),
446
- };
447
- return options;
448
- };
434
+ export { imageGraphOption } from "./graph_option.js";
449
435
  const prepareGenerateImages = async (context) => {
450
436
  const fileName = MulmoStudioContextMethods.getFileName(context);
451
437
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -472,7 +458,7 @@ const generateImages = async (context, args) => {
472
458
  ...defaultAgents,
473
459
  ...optionImageAgents,
474
460
  };
475
- const graph = new GraphAI(images_graph_data, graphaiAgent, await graphOption(context, settings));
461
+ const graph = new GraphAI(images_graph_data, graphaiAgent, await imageGraphOption(context, settings));
476
462
  Object.keys(injections).forEach((key) => {
477
463
  graph.injectValue(key, injections[key]);
478
464
  });
@@ -512,7 +498,7 @@ export const generateBeatImage = async (inputs) => {
512
498
  try {
513
499
  const { index, context, args } = inputs;
514
500
  const { settings, callbacks, forceMovie, forceImage, forceLipSync, forceSoundEffect, withBackup } = args ?? {};
515
- const options = await graphOption(context, settings);
501
+ const options = await imageGraphOption(context, settings);
516
502
  const injections = await prepareGenerateImages(context);
517
503
  const graph = new GraphAI(beat_graph_data, defaultAgents, options);
518
504
  Object.keys(injections).forEach((key) => {
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
13
13
  export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
14
14
  export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
15
15
  export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
16
+ export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
17
+ export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
18
+ export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
16
19
  export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
17
20
  export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
18
21
  style: "aspectFit" | "aspectFill";
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
9
9
  // const isMac = process.platform === "darwin";
10
10
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
11
11
  const VIDEO_FPS = 30;
12
+ const DEFAULT_DUCKING_RATIO = 0.3;
12
13
  export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
13
14
  const videoId = `v${inputIndex}`;
14
15
  const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
241
242
  return nextTransition !== null; // Any transition on next beat requires this beat's last frame
242
243
  });
243
244
  };
244
- const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
245
+ export const resolveMovieVolume = (beat, context) => {
246
+ const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
247
+ const ducking = context.presentationStyle.audioParams.ducking;
248
+ const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
249
+ if (ducking && hasSpeech) {
250
+ const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
251
+ return baseMovieVolume * ratio;
252
+ }
253
+ return baseMovieVolume;
254
+ };
255
+ export const isExplicitMixMode = (context) => {
256
+ const audioParams = context.presentationStyle.audioParams;
257
+ const duckingRequested = audioParams.ducking !== undefined;
258
+ const speechSuppressed = audioParams.suppressSpeech === true;
259
+ const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
260
+ const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
261
+ return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
262
+ };
263
+ export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
245
264
  if (audioIdsFromMovieBeats.length > 0) {
246
265
  const mainAudioId = "mainaudio";
247
266
  const compositeAudioId = "composite";
248
267
  const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
249
- FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
250
- ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
268
+ const useExplicitMix = isExplicitMixMode(context);
269
+ if (useExplicitMix) {
270
+ // Explicit mode: normalize=0 + limiter.
271
+ // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
272
+ // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
273
+ const mixedId = "mixed";
274
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
275
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
276
+ // Limiter as failsafe
277
+ ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
278
+ }
279
+ else {
280
+ // Legacy mode: normalize=1 (current behavior, fully backward compatible)
281
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
282
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
283
+ }
251
284
  return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
252
285
  }
253
286
  return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
420
453
  transitionVideoIds.push(transitionVideoId);
421
454
  }
422
455
  // NOTE: We don't support audio if the speed is not 1.0.
423
- const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
456
+ const movieVolume = resolveMovieVolume(beat, context);
424
457
  if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
425
458
  // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
426
459
  const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
442
475
  }
443
476
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
444
477
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
445
- const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
478
+ const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
446
479
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
447
480
  const endTime = performance.now();
448
481
  GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
@@ -1,3 +1,13 @@
1
1
  import type { AgentFunctionInfo } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
4
+ useExplicitMix: boolean;
5
+ voiceVolume: number;
6
+ };
7
+ export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
8
+ amixNormalize: string;
9
+ mixedOutputId: string;
10
+ limiterFilter: string | undefined;
11
+ };
2
12
  declare const addBGMAgentInfo: AgentFunctionInfo;
3
13
  export default addBGMAgentInfo;
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
3
3
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
4
4
  import { isFile } from "../utils/file.js";
5
5
  import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
6
+ export const resolveAddBgmMixParams = (audioParams) => {
7
+ const useExplicitMix = audioParams.ttsVolume !== undefined;
8
+ const ttsVolume = audioParams.ttsVolume ?? 1.0;
9
+ return {
10
+ useExplicitMix,
11
+ voiceVolume: audioParams.audioVolume * ttsVolume,
12
+ };
13
+ };
14
+ export const resolveAddBgmFilterConfig = (useExplicitMix) => {
15
+ const amixNormalize = useExplicitMix ? ":normalize=0" : "";
16
+ return {
17
+ amixNormalize,
18
+ mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
19
+ limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
20
+ };
21
+ };
6
22
  const addBGMAgent = async ({ namedInputs, params, }) => {
7
23
  const { voiceFile, outputFile, context } = namedInputs;
8
24
  const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
24
40
  const ffmpegContext = FfmpegContextInit();
25
41
  const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
26
42
  const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
27
- ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
28
- ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
29
- ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
30
- ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
43
+ const audioParams = context.presentationStyle.audioParams;
44
+ const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
45
+ ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
46
+ ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
47
+ const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
48
+ ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
49
+ if (limiterFilter) {
50
+ ffmpegContext.filterComplex.push(limiterFilter);
51
+ }
52
+ ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
31
53
  ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
32
54
  try {
33
55
  await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
100
100
  model,
101
101
  prompt,
102
102
  config: {
103
- durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
103
+ durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
104
104
  aspectRatio,
105
105
  personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
106
106
  },
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
5
  import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
6
- async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
6
+ function replicate_get_videoUrl(output) {
7
+ if (typeof output === "string")
8
+ return output;
9
+ if (output && typeof output === "object" && "url" in output)
10
+ return output.url();
11
+ return undefined;
12
+ }
13
+ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
7
14
  const replicate = new Replicate({
8
15
  auth: apiKey,
9
16
  });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
37
44
  input.image = base64Image;
38
45
  }
39
46
  }
47
+ // Add reference images if provided and model supports it
48
+ const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
49
+ if (referenceImages && referenceImages.length > 0) {
50
+ if (!referenceImagesParam) {
51
+ GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
52
+ }
53
+ else if (imagePath) {
54
+ GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
55
+ }
56
+ else {
57
+ input[referenceImagesParam] = referenceImages.map((ref) => {
58
+ const buffer = readFileSync(ref.imagePath);
59
+ return `data:image/png;base64,${buffer.toString("base64")}`;
60
+ });
61
+ }
62
+ }
40
63
  // Add last frame image if provided and model supports it
41
64
  if (lastFrameImagePath) {
42
65
  const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
57
80
  try {
58
81
  const output = await replicate.run(model, { input });
59
82
  // Download the generated video
60
- if (output && typeof output === "object" && "url" in output) {
61
- const videoUrl = output.url();
83
+ // Some models return a FileOutput object with a url() method; others return a plain string URL.
84
+ const videoUrl = replicate_get_videoUrl(output);
85
+ if (videoUrl) {
62
86
  const videoResponse = await fetch(videoUrl);
63
87
  if (!videoResponse.ok) {
64
88
  throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
89
113
  return "9:16";
90
114
  };
91
115
  export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
92
- const { prompt, imagePath, lastFrameImagePath } = namedInputs;
116
+ const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
93
117
  const aspectRatio = getAspectRatio(params.canvasSize);
94
118
  const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
95
119
  if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
110
134
  });
111
135
  }
112
136
  try {
113
- const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
137
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
114
138
  if (buffer) {
115
139
  return { buffer };
116
140
  }
@@ -179,6 +179,7 @@ export declare const MulmoPresentationStyleMethods: {
179
179
  imageName: string;
180
180
  referenceType: "ASSET" | "STYLE";
181
181
  }[] | undefined;
182
+ concurrency?: number | undefined;
182
183
  speed?: number | undefined;
183
184
  };
184
185
  keyName: string;
@@ -204,7 +205,10 @@ export declare const MulmoPresentationStyleMethods: {
204
205
  image?: string;
205
206
  }>;
206
207
  };
207
- getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
208
+ /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
209
+ getImageConcurrency(presentationStyle: MulmoPresentationStyle): number;
210
+ /** Concurrency for audio/TTS generation graph */
211
+ getAudioConcurrency(presentationStyle: MulmoPresentationStyle): number;
208
212
  getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
209
213
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
210
214
  };
@@ -6,10 +6,10 @@
6
6
  import { isNull } from "graphai";
7
7
  import { userAssert } from "../utils/utils.js";
8
8
  import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
9
- import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
9
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2TTSAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
10
10
  const defaultTextSlideStyles = [
11
11
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
12
- "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
12
+ "body { margin: 60px; margin-top: 40px; color:#333; background-color:#fff; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
13
13
  "h1 { font-size: 56px; margin-bottom: 20px; text-align: center }",
14
14
  "h2 { font-size: 48px; text-align: center }",
15
15
  "h3 { font-size: 36px }",
@@ -119,7 +119,17 @@ export const MulmoPresentationStyleMethods = {
119
119
  const agentInfo = provider2LipSyncAgent[lipSyncProvider];
120
120
  return agentInfo;
121
121
  },
122
- getConcurrency(presentationStyle) {
122
+ /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
123
+ getImageConcurrency(presentationStyle) {
124
+ const imageConcurrency = presentationStyle.imageParams?.concurrency;
125
+ const movieConcurrency = presentationStyle.movieParams?.concurrency;
126
+ // User-specified concurrency takes precedence.
127
+ // Use the smaller of imageParams/movieParams since they share the same graph.
128
+ if (imageConcurrency !== undefined || movieConcurrency !== undefined) {
129
+ const values = [imageConcurrency, movieConcurrency].filter((v) => v !== undefined);
130
+ return Math.min(...values);
131
+ }
132
+ // Fallback: provider-based auto-detection
123
133
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
124
134
  if (imageAgentInfo.imageParams.provider === "openai") {
125
135
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -131,6 +141,20 @@ export const MulmoPresentationStyleMethods = {
131
141
  }
132
142
  return 4;
133
143
  },
144
+ /** Concurrency for audio/TTS generation graph */
145
+ getAudioConcurrency(presentationStyle) {
146
+ // User-specified concurrency takes precedence
147
+ const userConcurrency = presentationStyle.audioParams?.concurrency;
148
+ if (userConcurrency !== undefined) {
149
+ return userConcurrency;
150
+ }
151
+ // Fallback: provider-based auto-detection
152
+ const hasLimitedConcurrencyProvider = Object.values(presentationStyle.speechParams.speakers).some((speaker) => {
153
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
154
+ return provider2TTSAgent[provider].hasLimitedConcurrency;
155
+ });
156
+ return hasLimitedConcurrencyProvider ? 1 : 8;
157
+ },
134
158
  getHtmlImageAgentInfo(presentationStyle) {
135
159
  const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
136
160
  const defaultConfig = provider2LLMAgent[provider];
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
80
80
  durations: number[];
81
81
  start_image: string | undefined;
82
82
  last_image?: string;
83
+ reference_images_param?: string;
83
84
  price_per_sec: number;
84
85
  }>;
85
86
  };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
90
91
  keyName: string;
91
92
  modelParams: Record<string, {
92
93
  durations: number[];
94
+ supportsDuration: boolean;
93
95
  supportsLastFrame: boolean;
94
96
  supportsReferenceImages: boolean;
95
97
  supportsPersonGeneration: boolean;