mulmocast 2.6.4 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/lib/actions/graph_option.d.ts +3 -0
  2. package/lib/actions/graph_option.js +18 -0
  3. package/lib/actions/image_references.d.ts +1 -0
  4. package/lib/actions/image_references.js +50 -12
  5. package/lib/actions/images.d.ts +2 -3
  6. package/lib/actions/images.js +4 -17
  7. package/lib/actions/movie.d.ts +3 -0
  8. package/lib/actions/movie.js +38 -5
  9. package/lib/agents/add_bgm_agent.d.ts +10 -0
  10. package/lib/agents/add_bgm_agent.js +26 -4
  11. package/lib/agents/movie_genai_agent.js +1 -1
  12. package/lib/agents/movie_replicate_agent.js +29 -5
  13. package/lib/types/provider2agent.d.ts +2 -0
  14. package/lib/types/provider2agent.js +78 -5
  15. package/lib/types/schema.d.ts +166 -4
  16. package/lib/types/schema.js +11 -1
  17. package/lib/utils/context.d.ts +72 -2
  18. package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
  19. package/lib/utils/image_plugins/html_tailwind.js +56 -5
  20. package/package.json +13 -12
  21. package/scripts/test/fixtures/movie_tone_high.mov +0 -0
  22. package/scripts/test/fixtures/movie_tone_low.mov +0 -0
  23. package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
  24. package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
  25. package/scripts/test/test_audio_mix.json +91 -0
  26. package/scripts/test/test_audio_mix_beat_vol.json +100 -0
  27. package/scripts/test/test_audio_mix_ducking.json +91 -0
  28. package/scripts/test/test_audio_mix_legacy.json +90 -0
  29. package/scripts/test/test_grok.json +57 -0
  30. package/scripts/test/test_image_prompt_reference.json +55 -0
  31. package/scripts/test/test_image_references.json +74 -0
  32. package/scripts/test/test_kling_v3.json +54 -0
  33. package/scripts/test/test_kling_v3_omni.json +54 -0
  34. package/scripts/test/test_lipsync2.json +48 -52
  35. package/scripts/test/test_lipsync5.json +66 -0
  36. package/scripts/test/test_runway.json +54 -0
  37. package/scripts/test/test_threejs.json +241 -0
  38. package/scripts/test/test_threejs_glb.json +154 -0
  39. package/scripts/test/test_veo31_lite.json +39 -0
@@ -0,0 +1,3 @@
1
+ import type { GraphOptions } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
@@ -0,0 +1,18 @@
1
+ import { TaskManager } from "graphai";
2
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
3
+ import { fileCacheAgentFilter } from "../utils/filters.js";
4
+ import { settings2GraphAIConfig } from "../utils/utils.js";
5
+ export const graphOption = async (context, settings) => {
6
+ const options = {
7
+ agentFilters: [
8
+ {
9
+ name: "fileCacheAgentFilter",
10
+ agent: fileCacheAgentFilter,
11
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
12
+ },
13
+ ],
14
+ taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
15
+ config: settings2GraphAIConfig(settings, process.env),
16
+ };
17
+ return options;
18
+ };
@@ -4,6 +4,7 @@ export declare const generateReferenceImage: (inputs: {
4
4
  key: string;
5
5
  index: number;
6
6
  image: MulmoImagePromptMedia;
7
+ referenceImagePath?: string;
7
8
  force?: boolean;
8
9
  }) => Promise<string>;
9
10
  export type MediaRefs = {
@@ -1,18 +1,19 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
2
  import { getReferenceImagePath } from "../utils/file.js";
3
- import { graphOption } from "./images.js";
3
+ import { graphOption } from "./graph_option.js";
4
4
  import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
5
5
  import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
6
6
  import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
7
7
  // public api
8
8
  // Application may call this function directly to generate reference image.
9
9
  export const generateReferenceImage = async (inputs) => {
10
- const { context, key, index, image, force } = inputs;
10
+ const { context, key, index, image, referenceImagePath, force } = inputs;
11
11
  const imagePath = getReferenceImagePath(context, key, "png");
12
12
  // generate image
13
13
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
14
14
  const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
15
15
  GraphAILogger.info(`Generating reference image for ${key}: ${prompt}`);
16
+ const referenceImages = referenceImagePath ? [referenceImagePath] : undefined;
16
17
  const image_graph_data = {
17
18
  version: 0.5,
18
19
  nodes: {
@@ -22,6 +23,7 @@ export const generateReferenceImage = async (inputs) => {
22
23
  inputs: {
23
24
  media: "image",
24
25
  prompt,
26
+ referenceImages,
25
27
  cache: {
26
28
  force: [context.force, force ?? false],
27
29
  file: imagePath,
@@ -58,12 +60,14 @@ export const getMediaRefs = async (context) => {
58
60
  }
59
61
  const imageRefs = {};
60
62
  const movieRefs = {};
63
+ // Stage 1: resolve non-referencing entries (image, imagePrompt without referenceImageName, movie)
61
64
  await Promise.all(Object.keys(images)
62
65
  .sort()
63
66
  .map(async (key, index) => {
64
67
  const image = images[key];
65
- if (image.type === "imagePrompt") {
66
- imageRefs[key] = await generateReferenceImage({ context, key, index, image, force: false });
68
+ if (image.type === "imagePrompt" && !image.referenceImageName) {
69
+ const refPath = image.referenceImage ? await MulmoMediaSourceMethods.imageReference(image.referenceImage, context, key) : undefined;
70
+ imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
67
71
  }
68
72
  else if (image.type === "image") {
69
73
  imageRefs[key] = await MulmoMediaSourceMethods.imageReference(image.source, context, key);
@@ -72,10 +76,23 @@ export const getMediaRefs = async (context) => {
72
76
  movieRefs[key] = await resolveMovieReference(image, context, key);
73
77
  }
74
78
  }));
79
+ // Stage 2: resolve imagePrompt with referenceImageName (depends on Stage 1 results)
80
+ await Promise.all(Object.keys(images)
81
+ .sort()
82
+ .map(async (key, index) => {
83
+ const image = images[key];
84
+ if (image.type === "imagePrompt" && image.referenceImageName) {
85
+ const refPath = imageRefs[image.referenceImageName];
86
+ if (!refPath) {
87
+ GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${image.referenceImageName}" not found in imageRefs — generating without reference`);
88
+ }
89
+ imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
90
+ }
91
+ }));
75
92
  return { imageRefs, movieRefs };
76
93
  };
77
- const resolveMovieReference = async (movie, context, key) => {
78
- return MulmoMediaSourceMethods.imageReference(movie.source, context, key);
94
+ const resolveMovieReference = async (media, context, key) => {
95
+ return MulmoMediaSourceMethods.imageReference(media.source, context, key);
79
96
  };
80
97
  const generateReferenceMovie = async (inputs) => {
81
98
  const { context, key, index, moviePrompt, imagePath } = inputs;
@@ -124,17 +141,19 @@ const generateReferenceMovie = async (inputs) => {
124
141
  const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) => {
125
142
  const localImageRefs = {};
126
143
  const localMovieRefs = {};
127
- // Stage 1: image, imagePrompt, movie (parallel)
144
+ // Stage 1: image, imagePrompt (without referenceImageName), movie (parallel)
128
145
  await Promise.all(Object.keys(images)
129
146
  .sort()
130
147
  .map(async (key, i) => {
131
148
  const entry = images[key];
132
- if (entry.type === "imagePrompt") {
149
+ if (entry.type === "imagePrompt" && !entry.referenceImageName) {
150
+ const refPath = entry.referenceImage ? await MulmoMediaSourceMethods.imageReference(entry.referenceImage, context, key) : undefined;
133
151
  localImageRefs[key] = await generateReferenceImage({
134
152
  context,
135
153
  key,
136
154
  index: beatIndex * 100 + i,
137
155
  image: entry,
156
+ referenceImagePath: refPath,
138
157
  });
139
158
  }
140
159
  else if (entry.type === "image") {
@@ -144,20 +163,39 @@ const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) =>
144
163
  localMovieRefs[key] = await resolveMovieReference(entry, context, key);
145
164
  }
146
165
  }));
147
- // Stage 2: moviePrompt (imageName references imageRefs only)
166
+ // Stage 2: imagePrompt with referenceImageName (depends on Stage 1)
167
+ const combinedImageRefsForImagePrompt = { ...globalImageRefs, ...localImageRefs };
168
+ await Promise.all(Object.keys(images)
169
+ .sort()
170
+ .map(async (key, i) => {
171
+ const entry = images[key];
172
+ if (entry.type === "imagePrompt" && entry.referenceImageName) {
173
+ const refPath = combinedImageRefsForImagePrompt[entry.referenceImageName];
174
+ if (!refPath) {
175
+ GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${entry.referenceImageName}" not found — generating without reference`);
176
+ }
177
+ localImageRefs[key] = await generateReferenceImage({
178
+ context,
179
+ key,
180
+ index: beatIndex * 100 + i,
181
+ image: entry,
182
+ referenceImagePath: refPath,
183
+ });
184
+ }
185
+ }));
186
+ // Stage 3: moviePrompt (imageName references imageRefs only)
148
187
  const combinedImageRefs = { ...globalImageRefs, ...localImageRefs };
149
188
  await Promise.all(Object.keys(images)
150
189
  .sort()
151
190
  .map(async (key, i) => {
152
191
  const entry = images[key];
153
192
  if (entry.type === "moviePrompt") {
154
- const mp = entry;
155
- const refImagePath = mp.imageName ? combinedImageRefs[mp.imageName] : undefined;
193
+ const refImagePath = entry.imageName ? combinedImageRefs[entry.imageName] : undefined;
156
194
  localMovieRefs[key] = await generateReferenceMovie({
157
195
  context,
158
196
  key,
159
197
  index: beatIndex * 100 + i,
160
- moviePrompt: mp,
198
+ moviePrompt: entry,
161
199
  imagePath: refImagePath,
162
200
  });
163
201
  }
@@ -1,4 +1,4 @@
1
- import type { GraphOptions, GraphData } from "graphai";
1
+ import type { GraphData } from "graphai";
2
2
  import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
3
3
  export declare const beat_graph_data: {
4
4
  version: number;
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
463
463
  };
464
464
  };
465
465
  export declare const images_graph_data: GraphData;
466
- export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
466
+ export { graphOption } from "./graph_option.js";
467
467
  type ImageOptions = {
468
468
  imageAgents: Record<string, unknown>;
469
469
  };
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
481
481
  withBackup?: boolean;
482
482
  };
483
483
  }) => Promise<void>;
484
- export {};
@@ -1,6 +1,6 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
- import { GraphAI, GraphAILogger, TaskManager } from "graphai";
3
+ import { GraphAI, GraphAILogger } from "graphai";
4
4
  import { AuthenticationError, RateLimitError } from "openai/index.js";
5
5
  import * as vanilla from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
10
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
11
  import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
12
12
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
13
- import { fileCacheAgentFilter } from "../utils/filters.js";
14
- import { settings2GraphAIConfig } from "../utils/utils.js";
15
13
  import { audioCheckerError } from "../utils/error_cause.js";
16
14
  import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
17
15
  import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
18
16
  import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ import { graphOption } from "./graph_option.js";
19
18
  const vanillaAgents = vanilla.default ?? vanilla;
20
19
  const imageAgents = {
21
20
  imageGenAIAgent,
@@ -432,20 +431,8 @@ export const images_graph_data = {
432
431
  },
433
432
  },
434
433
  };
435
- export const graphOption = async (context, settings) => {
436
- const options = {
437
- agentFilters: [
438
- {
439
- name: "fileCacheAgentFilter",
440
- agent: fileCacheAgentFilter,
441
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
442
- },
443
- ],
444
- taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
445
- config: settings2GraphAIConfig(settings, process.env),
446
- };
447
- return options;
448
- };
434
+ // graphOption moved to graph_option.ts to break circular dependency with image_references.ts
435
+ export { graphOption } from "./graph_option.js";
449
436
  const prepareGenerateImages = async (context) => {
450
437
  const fileName = MulmoStudioContextMethods.getFileName(context);
451
438
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
13
13
  export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
14
14
  export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
15
15
  export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
16
+ export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
17
+ export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
18
+ export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
16
19
  export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
17
20
  export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
18
21
  style: "aspectFit" | "aspectFill";
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
9
9
  // const isMac = process.platform === "darwin";
10
10
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
11
11
  const VIDEO_FPS = 30;
12
+ const DEFAULT_DUCKING_RATIO = 0.3;
12
13
  export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
13
14
  const videoId = `v${inputIndex}`;
14
15
  const videoFilters = [];
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
241
242
  return nextTransition !== null; // Any transition on next beat requires this beat's last frame
242
243
  });
243
244
  };
244
- const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
245
+ export const resolveMovieVolume = (beat, context) => {
246
+ const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
247
+ const ducking = context.presentationStyle.audioParams.ducking;
248
+ const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
249
+ if (ducking && hasSpeech) {
250
+ const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
251
+ return baseMovieVolume * ratio;
252
+ }
253
+ return baseMovieVolume;
254
+ };
255
+ export const isExplicitMixMode = (context) => {
256
+ const audioParams = context.presentationStyle.audioParams;
257
+ const duckingRequested = audioParams.ducking !== undefined;
258
+ const speechSuppressed = audioParams.suppressSpeech === true;
259
+ const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
260
+ const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
261
+ return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
262
+ };
263
+ export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
245
264
  if (audioIdsFromMovieBeats.length > 0) {
246
265
  const mainAudioId = "mainaudio";
247
266
  const compositeAudioId = "composite";
248
267
  const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
249
- FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
250
- ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
268
+ const useExplicitMix = isExplicitMixMode(context);
269
+ if (useExplicitMix) {
270
+ // Explicit mode: normalize=0 + limiter.
271
+ // ttsVolume is applied in addBGMAgent to avoid changing BGM level.
272
+ // Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
273
+ const mixedId = "mixed";
274
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
275
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
276
+ // Limiter as failsafe
277
+ ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
278
+ }
279
+ else {
280
+ // Legacy mode: normalize=1 (current behavior, fully backward compatible)
281
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
282
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
283
+ }
251
284
  return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
252
285
  }
253
286
  return artifactAudioId;
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
420
453
  transitionVideoIds.push(transitionVideoId);
421
454
  }
422
455
  // NOTE: We don't support audio if the speed is not 1.0.
423
- const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
456
+ const movieVolume = resolveMovieVolume(beat, context);
424
457
  if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
425
458
  // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
426
459
  const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
442
475
  }
443
476
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
444
477
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
445
- const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
478
+ const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
446
479
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
447
480
  const endTime = performance.now();
448
481
  GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
@@ -1,3 +1,13 @@
1
1
  import type { AgentFunctionInfo } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
4
+ useExplicitMix: boolean;
5
+ voiceVolume: number;
6
+ };
7
+ export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
8
+ amixNormalize: string;
9
+ mixedOutputId: string;
10
+ limiterFilter: string | undefined;
11
+ };
2
12
  declare const addBGMAgentInfo: AgentFunctionInfo;
3
13
  export default addBGMAgentInfo;
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
3
3
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
4
4
  import { isFile } from "../utils/file.js";
5
5
  import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
6
+ export const resolveAddBgmMixParams = (audioParams) => {
7
+ const useExplicitMix = audioParams.ttsVolume !== undefined;
8
+ const ttsVolume = audioParams.ttsVolume ?? 1.0;
9
+ return {
10
+ useExplicitMix,
11
+ voiceVolume: audioParams.audioVolume * ttsVolume,
12
+ };
13
+ };
14
+ export const resolveAddBgmFilterConfig = (useExplicitMix) => {
15
+ const amixNormalize = useExplicitMix ? ":normalize=0" : "";
16
+ return {
17
+ amixNormalize,
18
+ mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
19
+ limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
20
+ };
21
+ };
6
22
  const addBGMAgent = async ({ namedInputs, params, }) => {
7
23
  const { voiceFile, outputFile, context } = namedInputs;
8
24
  const { musicFile } = params;
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
24
40
  const ffmpegContext = FfmpegContextInit();
25
41
  const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
26
42
  const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
27
- ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
28
- ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
29
- ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
30
- ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
43
+ const audioParams = context.presentationStyle.audioParams;
44
+ const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
45
+ ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
46
+ ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
47
+ const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
48
+ ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
49
+ if (limiterFilter) {
50
+ ffmpegContext.filterComplex.push(limiterFilter);
51
+ }
52
+ ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
31
53
  ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
32
54
  try {
33
55
  await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
100
100
  model,
101
101
  prompt,
102
102
  config: {
103
- durationSeconds: capabilities?.supportsPersonGeneration === false ? undefined : duration,
103
+ durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
104
104
  aspectRatio,
105
105
  personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
106
106
  },
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
5
  import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
6
- async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration) {
6
+ function replicate_get_videoUrl(output) {
7
+ if (typeof output === "string")
8
+ return output;
9
+ if (output && typeof output === "object" && "url" in output)
10
+ return output.url();
11
+ return undefined;
12
+ }
13
+ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
7
14
  const replicate = new Replicate({
8
15
  auth: apiKey,
9
16
  });
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
37
44
  input.image = base64Image;
38
45
  }
39
46
  }
47
+ // Add reference images if provided and model supports it
48
+ const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
49
+ if (referenceImages && referenceImages.length > 0) {
50
+ if (!referenceImagesParam) {
51
+ GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
52
+ }
53
+ else if (imagePath) {
54
+ GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
55
+ }
56
+ else {
57
+ input[referenceImagesParam] = referenceImages.map((ref) => {
58
+ const buffer = readFileSync(ref.imagePath);
59
+ return `data:image/png;base64,${buffer.toString("base64")}`;
60
+ });
61
+ }
62
+ }
40
63
  // Add last frame image if provided and model supports it
41
64
  if (lastFrameImagePath) {
42
65
  const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
57
80
  try {
58
81
  const output = await replicate.run(model, { input });
59
82
  // Download the generated video
60
- if (output && typeof output === "object" && "url" in output) {
61
- const videoUrl = output.url();
83
+ // Some models return a FileOutput object with a url() method; others return a plain string URL.
84
+ const videoUrl = replicate_get_videoUrl(output);
85
+ if (videoUrl) {
62
86
  const videoResponse = await fetch(videoUrl);
63
87
  if (!videoResponse.ok) {
64
88
  throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
89
113
  return "9:16";
90
114
  };
91
115
  export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
92
- const { prompt, imagePath, lastFrameImagePath } = namedInputs;
116
+ const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
93
117
  const aspectRatio = getAspectRatio(params.canvasSize);
94
118
  const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
95
119
  if (!provider2MovieAgent.replicate.modelParams[model]) {
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
110
134
  });
111
135
  }
112
136
  try {
113
- const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
137
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
114
138
  if (buffer) {
115
139
  return { buffer };
116
140
  }
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
80
80
  durations: number[];
81
81
  start_image: string | undefined;
82
82
  last_image?: string;
83
+ reference_images_param?: string;
83
84
  price_per_sec: number;
84
85
  }>;
85
86
  };
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
90
91
  keyName: string;
91
92
  modelParams: Record<string, {
92
93
  durations: number[];
94
+ supportsDuration: boolean;
93
95
  supportsLastFrame: boolean;
94
96
  supportsReferenceImages: boolean;
95
97
  supportsPersonGeneration: boolean;
@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
92
92
  "kwaivgi/kling-v2.1-master",
93
93
  "google/veo-2",
94
94
  "google/veo-3",
95
+ "google/veo-3.1",
96
+ "google/veo-3.1-fast",
97
+ "google/veo-3.1-lite",
95
98
  "google/veo-3-fast",
96
99
  "minimax/video-01",
97
100
  "minimax/hailuo-02",
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
99
102
  "pixverse/pixverse-v4.5",
100
103
  "wan-video/wan-2.2-i2v-fast",
101
104
  "wan-video/wan-2.2-t2v-fast",
105
+ "xai/grok-imagine-video",
106
+ "xai/grok-imagine-r2v",
107
+ "runwayml/gen-4.5",
108
+ "kwaivgi/kling-v3-omni-video",
109
+ "kwaivgi/kling-v3-video",
102
110
  ],
103
111
  modelParams: {
104
112
  "bytedance/seedance-1-lite": {
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
138
146
  start_image: "image",
139
147
  price_per_sec: 0.75,
140
148
  },
149
+ "google/veo-3.1": {
150
+ durations: [4, 6, 8],
151
+ start_image: "image",
152
+ last_image: "last_frame_image",
153
+ reference_images_param: "reference_images",
154
+ price_per_sec: 0.75,
155
+ },
156
+ "google/veo-3.1-fast": {
157
+ durations: [4, 6, 8],
158
+ start_image: "image",
159
+ last_image: "last_frame_image",
160
+ price_per_sec: 0.4,
161
+ },
162
+ "google/veo-3.1-lite": {
163
+ durations: [4, 6, 8],
164
+ start_image: "image",
165
+ last_image: "last_frame",
166
+ price_per_sec: 0.05,
167
+ },
141
168
  "google/veo-3-fast": {
142
169
  durations: [8],
143
170
  start_image: "image",
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
175
202
  start_image: undefined,
176
203
  price_per_sec: 0.012,
177
204
  },
205
+ "xai/grok-imagine-video": {
206
+ durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
207
+ start_image: "image",
208
+ price_per_sec: 0.08,
209
+ },
210
+ "xai/grok-imagine-r2v": {
211
+ durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
212
+ start_image: undefined,
213
+ reference_images_param: "reference_images",
214
+ price_per_sec: 0.08,
215
+ },
216
+ "runwayml/gen-4.5": {
217
+ durations: [5, 10],
218
+ start_image: "image",
219
+ price_per_sec: 0.25,
220
+ },
221
+ "kwaivgi/kling-v3-omni-video": {
222
+ durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
223
+ start_image: "start_image",
224
+ last_image: "end_image",
225
+ reference_images_param: "reference_images",
226
+ price_per_sec: 0.3,
227
+ },
228
+ "kwaivgi/kling-v3-video": {
229
+ durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
230
+ start_image: "start_image",
231
+ last_image: "end_image",
232
+ reference_images_param: "reference_images",
233
+ price_per_sec: 0.3,
234
+ },
178
235
  },
179
236
  },
180
237
  google: {
181
238
  agentName: "movieGenAIAgent",
182
239
  defaultModel: "veo-2.0-generate-001",
183
- models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
240
+ models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
184
241
  keyName: "GEMINI_API_KEY",
185
242
  modelParams: {
243
+ "veo-3.1-lite-generate-preview": {
244
+ durations: [4, 6, 8],
245
+ supportsDuration: true,
246
+ supportsLastFrame: true,
247
+ supportsReferenceImages: false,
248
+ supportsPersonGeneration: false,
249
+ },
186
250
  "veo-3.1-generate-preview": {
187
251
  durations: [4, 6, 8],
252
+ supportsDuration: true,
188
253
  supportsLastFrame: true,
189
254
  supportsReferenceImages: true,
190
255
  supportsPersonGeneration: false,
191
256
  },
192
257
  "veo-3.0-generate-001": {
193
- durations: [4, 6, 8],
258
+ durations: [8],
259
+ supportsDuration: false, // Veo 3.0 always generates 8s
194
260
  supportsLastFrame: false,
195
261
  supportsReferenceImages: false,
196
262
  supportsPersonGeneration: false,
197
263
  },
198
264
  "veo-2.0-generate-001": {
199
- durations: [5, 6, 7, 8],
265
+ durations: [5, 6, 8],
266
+ supportsDuration: true,
200
267
  supportsLastFrame: false, // Vertex AI only
201
268
  supportsReferenceImages: false,
202
269
  supportsPersonGeneration: true,
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
229
296
  agentName: "lipSyncReplicateAgent",
230
297
  defaultModel: "bytedance/omni-human",
231
298
  keyName: "REPLICATE_API_TOKEN",
232
- models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
299
+ models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
233
300
  modelParams: {
234
301
  "bytedance/latentsync": {
235
302
  identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
247
314
  audio: "audio",
248
315
  price_per_sec: 0.14,
249
316
  },
317
+ "pixverse/lipsync": {
318
+ identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
319
+ video: "video",
320
+ audio: "audio",
321
+ },
250
322
  /* NOTE: This model does not work with large base64 urls.
251
323
  "sync/lipsync-2": {
252
324
  video: "video",
253
325
  audio: "audio",
254
326
  },
255
327
  */
256
- /* NOTE: This model does not work well for some unknown reason.
328
+ /* NOTE: This model does not work with base64 data URIs (error 1201).
257
329
  "kwaivgi/kling-lip-sync": {
330
+ identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
258
331
  video: "video_url",
259
332
  audio: "audio_file",
260
333
  },