mulmocast 2.6.5 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/lib/actions/graph_option.d.ts +3 -0
  2. package/lib/actions/graph_option.js +18 -0
  3. package/lib/actions/image_references.js +1 -1
  4. package/lib/actions/images.d.ts +2 -3
  5. package/lib/actions/images.js +4 -17
  6. package/lib/actions/movie.d.ts +3 -0
  7. package/lib/actions/movie.js +38 -5
  8. package/lib/agents/add_bgm_agent.d.ts +10 -0
  9. package/lib/agents/add_bgm_agent.js +26 -4
  10. package/lib/agents/movie_genai_agent.js +1 -1
  11. package/lib/agents/movie_replicate_agent.js +29 -5
  12. package/lib/types/provider2agent.d.ts +2 -0
  13. package/lib/types/provider2agent.js +78 -5
  14. package/lib/types/schema.d.ts +34 -4
  15. package/lib/types/schema.js +9 -1
  16. package/lib/utils/context.d.ts +17 -2
  17. package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
  18. package/lib/utils/image_plugins/html_tailwind.js +56 -5
  19. package/package.json +8 -7
  20. package/scripts/test/fixtures/movie_tone_high.mov +0 -0
  21. package/scripts/test/fixtures/movie_tone_low.mov +0 -0
  22. package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
  23. package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
  24. package/scripts/test/test_audio_mix.json +91 -0
  25. package/scripts/test/test_audio_mix_beat_vol.json +100 -0
  26. package/scripts/test/test_audio_mix_ducking.json +91 -0
  27. package/scripts/test/test_audio_mix_legacy.json +90 -0
  28. package/scripts/test/test_grok.json +57 -0
  29. package/scripts/test/test_image_references.json +74 -0
  30. package/scripts/test/test_kling_v3.json +54 -0
  31. package/scripts/test/test_kling_v3_omni.json +54 -0
  32. package/scripts/test/test_lipsync2.json +48 -52
  33. package/scripts/test/test_lipsync5.json +66 -0
  34. package/scripts/test/test_runway.json +54 -0
  35. package/scripts/test/test_threejs.json +241 -0
  36. package/scripts/test/test_threejs_glb.json +154 -0
  37. package/scripts/test/test_veo31_lite.json +39 -0
@@ -0,0 +1,3 @@
1
+ import type { GraphOptions } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
@@ -0,0 +1,18 @@
1
+ import { TaskManager } from "graphai";
2
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
3
+ import { fileCacheAgentFilter } from "../utils/filters.js";
4
+ import { settings2GraphAIConfig } from "../utils/utils.js";
5
+ export const graphOption = async (context, settings) => {
6
+ const options = {
7
+ agentFilters: [
8
+ {
9
+ name: "fileCacheAgentFilter",
10
+ agent: fileCacheAgentFilter,
11
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
12
+ },
13
+ ],
14
+ taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
15
+ config: settings2GraphAIConfig(settings, process.env),
16
+ };
17
+ return options;
18
+ };
@@ -1,6 +1,6 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
2
  import { getReferenceImagePath } from "../utils/file.js";
3
- import { graphOption } from "./images.js";
3
+ import { graphOption } from "./graph_option.js";
4
4
  import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
5
5
  import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
6
6
  import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
@@ -1,4 +1,4 @@
1
- import type { GraphOptions, GraphData } from "graphai";
1
+ import type { GraphData } from "graphai";
2
2
  import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
3
3
  export declare const beat_graph_data: {
4
4
  version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
463
463
  };
464
464
  };
465
465
  export declare const images_graph_data: GraphData;
466
- export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
466
+ export { graphOption } from "./graph_option.js";
467
467
  type ImageOptions = {
468
468
  imageAgents: Record<string, unknown>;
469
469
  };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
481
481
  withBackup?: boolean;
482
482
  };
483
483
  }) => Promise<void>;
484
- export {};
@@ -1,6 +1,6 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
- import { GraphAI, GraphAILogger, TaskManager } from "graphai";
3
+ import { GraphAI, GraphAILogger } from "graphai";
4
4
  import { AuthenticationError, RateLimitError } from "openai/index.js";
5
5
  import * as vanilla from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
10
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
11
  import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
12
12
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
13
- import { fileCacheAgentFilter } from "../utils/filters.js";
14
- import { settings2GraphAIConfig } from "../utils/utils.js";
15
13
  import { audioCheckerError } from "../utils/error_cause.js";
16
14
  import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
17
15
  import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
18
16
  import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ import { graphOption } from "./graph_option.js";
19
18
  const vanillaAgents = vanilla.default ?? vanilla;
20
19
  const imageAgents = {
21
20
  imageGenAIAgent,
@@ -432,20 +431,8 @@ export const images_graph_data = {
432
431
  },
433
432
  },
434
433
  };
435
- export const graphOption = async (context, settings) => {
436
- const options = {
437
- agentFilters: [
438
- {
439
- name: "fileCacheAgentFilter",
440
- agent: fileCacheAgentFilter,
441
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
442
- },
443
- ],
444
- taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
445
- config: settings2GraphAIConfig(settings, process.env),
446
- };
447
- return options;
448
- };
434
+ // graphOption moved to graph_option.ts to break circular dependency with image_references.ts
435
+ export { graphOption } from "./graph_option.js";
449
436
  const prepareGenerateImages = async (context) => {
450
437
  const fileName = MulmoStudioContextMethods.getFileName(context);
451
438
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
13
13
  export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
14
14
  export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
15
15
  export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
16
+ export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
17
+ export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
18
+ export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
16
19
  export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
17
20
  export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
18
21
  style: "aspectFit" | "aspectFill";
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
9
9
  // const isMac = process.platform === "darwin";
10
10
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
11
11
  const VIDEO_FPS = 30;
12
+ const DEFAULT_DUCKING_RATIO = 0.3;
12
13
  export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
13
14
  const videoId = `v${inputIndex}`;
14
15
  const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
241
242
  return nextTransition !== null; // Any transition on next beat requires this beat's last frame
242
243
  });
243
244
  };
244
- const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
245
+ export const resolveMovieVolume = (beat, context) => {
246
+ const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
247
+ const ducking = context.presentationStyle.audioParams.ducking;
248
+ const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
249
+ if (ducking && hasSpeech) {
250
+ const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
251
+ return baseMovieVolume * ratio;
252
+ }
253
+ return baseMovieVolume;
254
+ };
255
+ export const isExplicitMixMode = (context) => {
256
+ const audioParams = context.presentationStyle.audioParams;
257
+ const duckingRequested = audioParams.ducking !== undefined;
258
+ const speechSuppressed = audioParams.suppressSpeech === true;
259
+ const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
260
+ const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
261
+ return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
262
+ };
263
+ export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
245
264
  if (audioIdsFromMovieBeats.length > 0) {
246
265
  const mainAudioId = "mainaudio";
247
266
  const compositeAudioId = "composite";
248
267
  const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
249
- FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
250
- ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
268
+ const useExplicitMix = isExplicitMixMode(context);
269
+ if (useExplicitMix) {
270
+ // Explicit mode: normalize=0 + limiter.
271
+ // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
272
+ // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
273
+ const mixedId = "mixed";
274
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
275
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
276
+ // Limiter as failsafe
277
+ ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
278
+ }
279
+ else {
280
+ // Legacy mode: normalize=1 (current behavior, fully backward compatible)
281
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
282
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
283
+ }
251
284
  return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
252
285
  }
253
286
  return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
420
453
  transitionVideoIds.push(transitionVideoId);
421
454
  }
422
455
  // NOTE: We don't support audio if the speed is not 1.0.
423
- const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
456
+ const movieVolume = resolveMovieVolume(beat, context);
424
457
  if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
425
458
  // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
426
459
  const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
442
475
  }
443
476
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
444
477
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
445
- const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
478
+ const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
446
479
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
447
480
  const endTime = performance.now();
448
481
  GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
@@ -1,3 +1,13 @@
1
1
  import type { AgentFunctionInfo } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
4
+ useExplicitMix: boolean;
5
+ voiceVolume: number;
6
+ };
7
+ export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
8
+ amixNormalize: string;
9
+ mixedOutputId: string;
10
+ limiterFilter: string | undefined;
11
+ };
2
12
  declare const addBGMAgentInfo: AgentFunctionInfo;
3
13
  export default addBGMAgentInfo;
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
3
3
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
4
4
  import { isFile } from "../utils/file.js";
5
5
  import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
6
+ export const resolveAddBgmMixParams = (audioParams) => {
7
+ const useExplicitMix = audioParams.ttsVolume !== undefined;
8
+ const ttsVolume = audioParams.ttsVolume ?? 1.0;
9
+ return {
10
+ useExplicitMix,
11
+ voiceVolume: audioParams.audioVolume * ttsVolume,
12
+ };
13
+ };
14
+ export const resolveAddBgmFilterConfig = (useExplicitMix) => {
15
+ const amixNormalize = useExplicitMix ? ":normalize=0" : "";
16
+ return {
17
+ amixNormalize,
18
+ mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
19
+ limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
20
+ };
21
+ };
6
22
  const addBGMAgent = async ({ namedInputs, params, }) => {
7
23
  const { voiceFile, outputFile, context } = namedInputs;
8
24
  const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
24
40
  const ffmpegContext = FfmpegContextInit();
25
41
  const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
26
42
  const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
27
- ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
28
- ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
29
- ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
30
- ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
43
+ const audioParams = context.presentationStyle.audioParams;
44
+ const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
45
+ ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
46
+ ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
47
+ const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
48
+ ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
49
+ if (limiterFilter) {
50
+ ffmpegContext.filterComplex.push(limiterFilter);
51
+ }
52
+ ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
31
53
  ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
32
54
  try {
33
55
  await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
100
100
  model,
101
101
  prompt,
102
102
  config: {
103
- durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
103
+ durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
104
104
  aspectRatio,
105
105
  personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
106
106
  },
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
5
  import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
6
- async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
6
+ function replicate_get_videoUrl(output) {
7
+ if (typeof output === "string")
8
+ return output;
9
+ if (output && typeof output === "object" && "url" in output)
10
+ return output.url();
11
+ return undefined;
12
+ }
13
+ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
7
14
  const replicate = new Replicate({
8
15
  auth: apiKey,
9
16
  });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
37
44
  input.image = base64Image;
38
45
  }
39
46
  }
47
+ // Add reference images if provided and model supports it
48
+ const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
49
+ if (referenceImages && referenceImages.length > 0) {
50
+ if (!referenceImagesParam) {
51
+ GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
52
+ }
53
+ else if (imagePath) {
54
+ GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
55
+ }
56
+ else {
57
+ input[referenceImagesParam] = referenceImages.map((ref) => {
58
+ const buffer = readFileSync(ref.imagePath);
59
+ return `data:image/png;base64,${buffer.toString("base64")}`;
60
+ });
61
+ }
62
+ }
40
63
  // Add last frame image if provided and model supports it
41
64
  if (lastFrameImagePath) {
42
65
  const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
57
80
  try {
58
81
  const output = await replicate.run(model, { input });
59
82
  // Download the generated video
60
- if (output && typeof output === "object" && "url" in output) {
61
- const videoUrl = output.url();
83
+ // Some models return a FileOutput object with a url() method; others return a plain string URL.
84
+ const videoUrl = replicate_get_videoUrl(output);
85
+ if (videoUrl) {
62
86
  const videoResponse = await fetch(videoUrl);
63
87
  if (!videoResponse.ok) {
64
88
  throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
89
113
  return "9:16";
90
114
  };
91
115
  export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
92
- const { prompt, imagePath, lastFrameImagePath } = namedInputs;
116
+ const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
93
117
  const aspectRatio = getAspectRatio(params.canvasSize);
94
118
  const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
95
119
  if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
110
134
  });
111
135
  }
112
136
  try {
113
- const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
137
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
114
138
  if (buffer) {
115
139
  return { buffer };
116
140
  }
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
80
80
  durations: number[];
81
81
  start_image: string | undefined;
82
82
  last_image?: string;
83
+ reference_images_param?: string;
83
84
  price_per_sec: number;
84
85
  }>;
85
86
  };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
90
91
  keyName: string;
91
92
  modelParams: Record<string, {
92
93
  durations: number[];
94
+ supportsDuration: boolean;
93
95
  supportsLastFrame: boolean;
94
96
  supportsReferenceImages: boolean;
95
97
  supportsPersonGeneration: boolean;
@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
92
92
  "kwaivgi/kling-v2.1-master",
93
93
  "google/veo-2",
94
94
  "google/veo-3",
95
+ "google/veo-3.1",
96
+ "google/veo-3.1-fast",
97
+ "google/veo-3.1-lite",
95
98
  "google/veo-3-fast",
96
99
  "minimax/video-01",
97
100
  "minimax/hailuo-02",
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
99
102
  "pixverse/pixverse-v4.5",
100
103
  "wan-video/wan-2.2-i2v-fast",
101
104
  "wan-video/wan-2.2-t2v-fast",
105
+ "xai/grok-imagine-video",
106
+ "xai/grok-imagine-r2v",
107
+ "runwayml/gen-4.5",
108
+ "kwaivgi/kling-v3-omni-video",
109
+ "kwaivgi/kling-v3-video",
102
110
  ],
103
111
  modelParams: {
104
112
  "bytedance/seedance-1-lite": {
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
138
146
  start_image: "image",
139
147
  price_per_sec: 0.75,
140
148
  },
149
+ "google/veo-3.1": {
150
+ durations: [4, 6, 8],
151
+ start_image: "image",
152
+ last_image: "last_frame_image",
153
+ reference_images_param: "reference_images",
154
+ price_per_sec: 0.75,
155
+ },
156
+ "google/veo-3.1-fast": {
157
+ durations: [4, 6, 8],
158
+ start_image: "image",
159
+ last_image: "last_frame_image",
160
+ price_per_sec: 0.4,
161
+ },
162
+ "google/veo-3.1-lite": {
163
+ durations: [4, 6, 8],
164
+ start_image: "image",
165
+ last_image: "last_frame",
166
+ price_per_sec: 0.05,
167
+ },
141
168
  "google/veo-3-fast": {
142
169
  durations: [8],
143
170
  start_image: "image",
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
175
202
  start_image: undefined,
176
203
  price_per_sec: 0.012,
177
204
  },
205
+ "xai/grok-imagine-video": {
206
+ durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
207
+ start_image: "image",
208
+ price_per_sec: 0.08,
209
+ },
210
+ "xai/grok-imagine-r2v": {
211
+ durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
212
+ start_image: undefined,
213
+ reference_images_param: "reference_images",
214
+ price_per_sec: 0.08,
215
+ },
216
+ "runwayml/gen-4.5": {
217
+ durations: [5, 10],
218
+ start_image: "image",
219
+ price_per_sec: 0.25,
220
+ },
221
+ "kwaivgi/kling-v3-omni-video": {
222
+ durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
223
+ start_image: "start_image",
224
+ last_image: "end_image",
225
+ reference_images_param: "reference_images",
226
+ price_per_sec: 0.3,
227
+ },
228
+ "kwaivgi/kling-v3-video": {
229
+ durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
230
+ start_image: "start_image",
231
+ last_image: "end_image",
232
+ reference_images_param: "reference_images",
233
+ price_per_sec: 0.3,
234
+ },
178
235
  },
179
236
  },
180
237
  google: {
181
238
  agentName: "movieGenAIAgent",
182
239
  defaultModel: "veo-2.0-generate-001",
183
- models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
240
+ models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
184
241
  keyName: "GEMINI_API_KEY",
185
242
  modelParams: {
243
+ "veo-3.1-lite-generate-preview": {
244
+ durations: [4, 6, 8],
245
+ supportsDuration: true,
246
+ supportsLastFrame: true,
247
+ supportsReferenceImages: false,
248
+ supportsPersonGeneration: false,
249
+ },
186
250
  "veo-3.1-generate-preview": {
187
251
  durations: [4, 6, 8],
252
+ supportsDuration: true,
188
253
  supportsLastFrame: true,
189
254
  supportsReferenceImages: true,
190
255
  supportsPersonGeneration: false,
191
256
  },
192
257
  "veo-3.0-generate-001": {
193
- durations: [4, 6, 8],
258
+ durations: [8],
259
+ supportsDuration: false, // Veo 3.0 always generates 8s
194
260
  supportsLastFrame: false,
195
261
  supportsReferenceImages: false,
196
262
  supportsPersonGeneration: false,
197
263
  },
198
264
  "veo-2.0-generate-001": {
199
- durations: [5, 6, 7, 8],
265
+ durations: [5, 6, 8],
266
+ supportsDuration: true,
200
267
  supportsLastFrame: false, // Vertex AI only
201
268
  supportsReferenceImages: false,
202
269
  supportsPersonGeneration: true,
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
229
296
  agentName: "lipSyncReplicateAgent",
230
297
  defaultModel: "bytedance/omni-human",
231
298
  keyName: "REPLICATE_API_TOKEN",
232
- models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
299
+ models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
233
300
  modelParams: {
234
301
  "bytedance/latentsync": {
235
302
  identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
247
314
  audio: "audio",
248
315
  price_per_sec: 0.14,
249
316
  },
317
+ "pixverse/lipsync": {
318
+ identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
319
+ video: "video",
320
+ audio: "audio",
321
+ },
250
322
  /* NOTE: This model does not work with large base64 urls.
251
323
  "sync/lipsync-2": {
252
324
  video: "video",
253
325
  audio: "audio",
254
326
  },
255
327
  */
256
- /* NOTE: This model does not work well for some unknown reason.
328
+ /* NOTE: This model does not work with base64 data URIs (error 1201).
257
329
  "kwaivgi/kling-lip-sync": {
330
+ identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
258
331
  video: "video_url",
259
332
  audio: "audio_file",
260
333
  },
@@ -3370,7 +3370,7 @@ export declare const mulmoSlideParamsSchema: z.ZodObject<{
3370
3370
  }, z.core.$strict>;
3371
3371
  export declare const beatAudioParamsSchema: z.ZodObject<{
3372
3372
  padding: z.ZodOptional<z.ZodNumber>;
3373
- movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
3373
+ movieVolume: z.ZodOptional<z.ZodNumber>;
3374
3374
  }, z.core.$strict>;
3375
3375
  export declare const mulmoHtmlImageParamsSchema: z.ZodObject<{
3376
3376
  model: z.ZodOptional<z.ZodString>;
@@ -3393,6 +3393,11 @@ export declare const audioParamsSchema: z.ZodObject<{
3393
3393
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
3394
3394
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
3395
3395
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
3396
+ movieVolume: z.ZodOptional<z.ZodNumber>;
3397
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
3398
+ ducking: z.ZodOptional<z.ZodObject<{
3399
+ ratio: z.ZodOptional<z.ZodNumber>;
3400
+ }, z.core.$strip>>;
3396
3401
  }, z.core.$strict>;
3397
3402
  export declare const htmlPromptParamsSchema: z.ZodObject<{
3398
3403
  systemPrompt: z.ZodDefault<z.ZodOptional<z.ZodString>>;
@@ -6336,7 +6341,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
6336
6341
  }, z.core.$strict>>;
6337
6342
  audioParams: z.ZodOptional<z.ZodObject<{
6338
6343
  padding: z.ZodOptional<z.ZodNumber>;
6339
- movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
6344
+ movieVolume: z.ZodOptional<z.ZodNumber>;
6340
6345
  }, z.core.$strict>>;
6341
6346
  movieParams: z.ZodOptional<z.ZodObject<{
6342
6347
  provider: z.ZodOptional<z.ZodEnum<{
@@ -7181,6 +7186,11 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
7181
7186
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
7182
7187
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
7183
7188
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
7189
+ movieVolume: z.ZodOptional<z.ZodNumber>;
7190
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
7191
+ ducking: z.ZodOptional<z.ZodObject<{
7192
+ ratio: z.ZodOptional<z.ZodNumber>;
7193
+ }, z.core.$strip>>;
7184
7194
  }, z.core.$strict>>;
7185
7195
  }, z.core.$strip>;
7186
7196
  export declare const mulmoReferenceSchema: z.ZodObject<{
@@ -7677,6 +7687,11 @@ export declare const mulmoScriptSchema: z.ZodObject<{
7677
7687
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
7678
7688
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
7679
7689
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
7690
+ movieVolume: z.ZodOptional<z.ZodNumber>;
7691
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
7692
+ ducking: z.ZodOptional<z.ZodObject<{
7693
+ ratio: z.ZodOptional<z.ZodNumber>;
7694
+ }, z.core.$strip>>;
7680
7695
  }, z.core.$strict>>;
7681
7696
  title: z.ZodOptional<z.ZodString>;
7682
7697
  description: z.ZodOptional<z.ZodString>;
@@ -10345,7 +10360,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
10345
10360
  }, z.core.$strict>>;
10346
10361
  audioParams: z.ZodOptional<z.ZodObject<{
10347
10362
  padding: z.ZodOptional<z.ZodNumber>;
10348
- movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
10363
+ movieVolume: z.ZodOptional<z.ZodNumber>;
10349
10364
  }, z.core.$strict>>;
10350
10365
  movieParams: z.ZodOptional<z.ZodObject<{
10351
10366
  provider: z.ZodOptional<z.ZodEnum<{
@@ -11265,6 +11280,11 @@ export declare const mulmoStudioSchema: z.ZodObject<{
11265
11280
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
11266
11281
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
11267
11282
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
11283
+ movieVolume: z.ZodOptional<z.ZodNumber>;
11284
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
11285
+ ducking: z.ZodOptional<z.ZodObject<{
11286
+ ratio: z.ZodOptional<z.ZodNumber>;
11287
+ }, z.core.$strip>>;
11268
11288
  }, z.core.$strict>>;
11269
11289
  title: z.ZodOptional<z.ZodString>;
11270
11290
  description: z.ZodOptional<z.ZodString>;
@@ -13933,7 +13953,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
13933
13953
  }, z.core.$strict>>;
13934
13954
  audioParams: z.ZodOptional<z.ZodObject<{
13935
13955
  padding: z.ZodOptional<z.ZodNumber>;
13936
- movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
13956
+ movieVolume: z.ZodOptional<z.ZodNumber>;
13937
13957
  }, z.core.$strict>>;
13938
13958
  movieParams: z.ZodOptional<z.ZodObject<{
13939
13959
  provider: z.ZodOptional<z.ZodEnum<{
@@ -14789,6 +14809,11 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
14789
14809
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
14790
14810
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
14791
14811
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
14812
+ movieVolume: z.ZodOptional<z.ZodNumber>;
14813
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
14814
+ ducking: z.ZodOptional<z.ZodObject<{
14815
+ ratio: z.ZodOptional<z.ZodNumber>;
14816
+ }, z.core.$strip>>;
14792
14817
  }, z.core.$strict>>;
14793
14818
  }, z.core.$strip>>;
14794
14819
  }, z.core.$strict>;
@@ -15279,6 +15304,11 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
15279
15304
  bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
15280
15305
  audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
15281
15306
  suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
15307
+ movieVolume: z.ZodOptional<z.ZodNumber>;
15308
+ ttsVolume: z.ZodOptional<z.ZodNumber>;
15309
+ ducking: z.ZodOptional<z.ZodObject<{
15310
+ ratio: z.ZodOptional<z.ZodNumber>;
15311
+ }, z.core.$strip>>;
15282
15312
  }, z.core.$strict>>;
15283
15313
  }, z.core.$strip>>;
15284
15314
  filename: z.ZodString;
@@ -409,7 +409,7 @@ export const mulmoSlideParamsSchema = z
409
409
  export const beatAudioParamsSchema = z
410
410
  .object({
411
411
  padding: z.number().optional().describe("Padding between beats"), // seconds
412
- movieVolume: z.number().optional().default(1.0).describe("Audio volume of the imported or generated movie"),
412
+ movieVolume: z.number().min(0).max(1).optional().describe("Audio volume of the imported or generated movie"),
413
413
  })
414
414
  .strict();
415
415
  export const mulmoHtmlImageParamsSchema = z
@@ -428,6 +428,14 @@ export const audioParamsSchema = z
428
428
  bgmVolume: z.number().optional().default(0.2).describe("Volume of the background music"),
429
429
  audioVolume: z.number().optional().default(1.0).describe("Volume of the audio"),
430
430
  suppressSpeech: z.boolean().optional().default(false).describe("Suppress speech generation"),
431
+ movieVolume: z.number().min(0).max(1).optional().describe("Default movie audio volume for all beats"),
432
+ ttsVolume: z.number().min(0).max(2).optional().describe("TTS narration volume before mixing with BGM/movie audio"),
433
+ ducking: z
434
+ .object({
435
+ ratio: z.number().min(0).max(1).optional().describe("Movie volume ratio during TTS beats (default 0.3)"),
436
+ })
437
+ .optional()
438
+ .describe("Auto-reduce movie audio when TTS is playing"),
431
439
  })
432
440
  .strict();
433
441
  export const htmlPromptParamsSchema = z