mulmocast 2.6.4 → 2.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/graph_option.d.ts +3 -0
- package/lib/actions/graph_option.js +18 -0
- package/lib/actions/image_references.d.ts +1 -0
- package/lib/actions/image_references.js +50 -12
- package/lib/actions/images.d.ts +2 -3
- package/lib/actions/images.js +4 -17
- package/lib/actions/movie.d.ts +3 -0
- package/lib/actions/movie.js +38 -5
- package/lib/agents/add_bgm_agent.d.ts +10 -0
- package/lib/agents/add_bgm_agent.js +26 -4
- package/lib/agents/movie_genai_agent.js +1 -1
- package/lib/agents/movie_replicate_agent.js +29 -5
- package/lib/types/provider2agent.d.ts +2 -0
- package/lib/types/provider2agent.js +78 -5
- package/lib/types/schema.d.ts +166 -4
- package/lib/types/schema.js +11 -1
- package/lib/utils/context.d.ts +72 -2
- package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
- package/lib/utils/image_plugins/html_tailwind.js +56 -5
- package/package.json +13 -12
- package/scripts/test/fixtures/movie_tone_high.mov +0 -0
- package/scripts/test/fixtures/movie_tone_low.mov +0 -0
- package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
- package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
- package/scripts/test/test_audio_mix.json +91 -0
- package/scripts/test/test_audio_mix_beat_vol.json +100 -0
- package/scripts/test/test_audio_mix_ducking.json +91 -0
- package/scripts/test/test_audio_mix_legacy.json +90 -0
- package/scripts/test/test_grok.json +57 -0
- package/scripts/test/test_image_prompt_reference.json +55 -0
- package/scripts/test/test_image_references.json +74 -0
- package/scripts/test/test_kling_v3.json +54 -0
- package/scripts/test/test_kling_v3_omni.json +54 -0
- package/scripts/test/test_lipsync2.json +48 -52
- package/scripts/test/test_lipsync5.json +66 -0
- package/scripts/test/test_runway.json +54 -0
- package/scripts/test/test_threejs.json +241 -0
- package/scripts/test/test_threejs_glb.json +154 -0
- package/scripts/test/test_veo31_lite.json +39 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { TaskManager } from "graphai";
|
|
2
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
3
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
4
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
5
|
+
export const graphOption = async (context, settings) => {
|
|
6
|
+
const options = {
|
|
7
|
+
agentFilters: [
|
|
8
|
+
{
|
|
9
|
+
name: "fileCacheAgentFilter",
|
|
10
|
+
agent: fileCacheAgentFilter,
|
|
11
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
|
|
12
|
+
},
|
|
13
|
+
],
|
|
14
|
+
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
15
|
+
config: settings2GraphAIConfig(settings, process.env),
|
|
16
|
+
};
|
|
17
|
+
return options;
|
|
18
|
+
};
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
2
|
import { getReferenceImagePath } from "../utils/file.js";
|
|
3
|
-
import { graphOption } from "./
|
|
3
|
+
import { graphOption } from "./graph_option.js";
|
|
4
4
|
import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
|
|
5
5
|
import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
|
|
6
6
|
import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
|
|
7
7
|
// public api
|
|
8
8
|
// Application may call this function directly to generate reference image.
|
|
9
9
|
export const generateReferenceImage = async (inputs) => {
|
|
10
|
-
const { context, key, index, image, force } = inputs;
|
|
10
|
+
const { context, key, index, image, referenceImagePath, force } = inputs;
|
|
11
11
|
const imagePath = getReferenceImagePath(context, key, "png");
|
|
12
12
|
// generate image
|
|
13
13
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
14
14
|
const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
|
|
15
15
|
GraphAILogger.info(`Generating reference image for ${key}: ${prompt}`);
|
|
16
|
+
const referenceImages = referenceImagePath ? [referenceImagePath] : undefined;
|
|
16
17
|
const image_graph_data = {
|
|
17
18
|
version: 0.5,
|
|
18
19
|
nodes: {
|
|
@@ -22,6 +23,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
22
23
|
inputs: {
|
|
23
24
|
media: "image",
|
|
24
25
|
prompt,
|
|
26
|
+
referenceImages,
|
|
25
27
|
cache: {
|
|
26
28
|
force: [context.force, force ?? false],
|
|
27
29
|
file: imagePath,
|
|
@@ -58,12 +60,14 @@ export const getMediaRefs = async (context) => {
|
|
|
58
60
|
}
|
|
59
61
|
const imageRefs = {};
|
|
60
62
|
const movieRefs = {};
|
|
63
|
+
// Stage 1: resolve non-referencing entries (image, imagePrompt without referenceImageName, movie)
|
|
61
64
|
await Promise.all(Object.keys(images)
|
|
62
65
|
.sort()
|
|
63
66
|
.map(async (key, index) => {
|
|
64
67
|
const image = images[key];
|
|
65
|
-
if (image.type === "imagePrompt") {
|
|
66
|
-
|
|
68
|
+
if (image.type === "imagePrompt" && !image.referenceImageName) {
|
|
69
|
+
const refPath = image.referenceImage ? await MulmoMediaSourceMethods.imageReference(image.referenceImage, context, key) : undefined;
|
|
70
|
+
imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
|
|
67
71
|
}
|
|
68
72
|
else if (image.type === "image") {
|
|
69
73
|
imageRefs[key] = await MulmoMediaSourceMethods.imageReference(image.source, context, key);
|
|
@@ -72,10 +76,23 @@ export const getMediaRefs = async (context) => {
|
|
|
72
76
|
movieRefs[key] = await resolveMovieReference(image, context, key);
|
|
73
77
|
}
|
|
74
78
|
}));
|
|
79
|
+
// Stage 2: resolve imagePrompt with referenceImageName (depends on Stage 1 results)
|
|
80
|
+
await Promise.all(Object.keys(images)
|
|
81
|
+
.sort()
|
|
82
|
+
.map(async (key, index) => {
|
|
83
|
+
const image = images[key];
|
|
84
|
+
if (image.type === "imagePrompt" && image.referenceImageName) {
|
|
85
|
+
const refPath = imageRefs[image.referenceImageName];
|
|
86
|
+
if (!refPath) {
|
|
87
|
+
GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${image.referenceImageName}" not found in imageRefs — generating without reference`);
|
|
88
|
+
}
|
|
89
|
+
imageRefs[key] = await generateReferenceImage({ context, key, index, image, referenceImagePath: refPath, force: false });
|
|
90
|
+
}
|
|
91
|
+
}));
|
|
75
92
|
return { imageRefs, movieRefs };
|
|
76
93
|
};
|
|
77
|
-
const resolveMovieReference = async (
|
|
78
|
-
return MulmoMediaSourceMethods.imageReference(
|
|
94
|
+
const resolveMovieReference = async (media, context, key) => {
|
|
95
|
+
return MulmoMediaSourceMethods.imageReference(media.source, context, key);
|
|
79
96
|
};
|
|
80
97
|
const generateReferenceMovie = async (inputs) => {
|
|
81
98
|
const { context, key, index, moviePrompt, imagePath } = inputs;
|
|
@@ -124,17 +141,19 @@ const generateReferenceMovie = async (inputs) => {
|
|
|
124
141
|
const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) => {
|
|
125
142
|
const localImageRefs = {};
|
|
126
143
|
const localMovieRefs = {};
|
|
127
|
-
// Stage 1: image, imagePrompt, movie (parallel)
|
|
144
|
+
// Stage 1: image, imagePrompt (without referenceImageName), movie (parallel)
|
|
128
145
|
await Promise.all(Object.keys(images)
|
|
129
146
|
.sort()
|
|
130
147
|
.map(async (key, i) => {
|
|
131
148
|
const entry = images[key];
|
|
132
|
-
if (entry.type === "imagePrompt") {
|
|
149
|
+
if (entry.type === "imagePrompt" && !entry.referenceImageName) {
|
|
150
|
+
const refPath = entry.referenceImage ? await MulmoMediaSourceMethods.imageReference(entry.referenceImage, context, key) : undefined;
|
|
133
151
|
localImageRefs[key] = await generateReferenceImage({
|
|
134
152
|
context,
|
|
135
153
|
key,
|
|
136
154
|
index: beatIndex * 100 + i,
|
|
137
155
|
image: entry,
|
|
156
|
+
referenceImagePath: refPath,
|
|
138
157
|
});
|
|
139
158
|
}
|
|
140
159
|
else if (entry.type === "image") {
|
|
@@ -144,20 +163,39 @@ const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) =>
|
|
|
144
163
|
localMovieRefs[key] = await resolveMovieReference(entry, context, key);
|
|
145
164
|
}
|
|
146
165
|
}));
|
|
147
|
-
// Stage 2:
|
|
166
|
+
// Stage 2: imagePrompt with referenceImageName (depends on Stage 1)
|
|
167
|
+
const combinedImageRefsForImagePrompt = { ...globalImageRefs, ...localImageRefs };
|
|
168
|
+
await Promise.all(Object.keys(images)
|
|
169
|
+
.sort()
|
|
170
|
+
.map(async (key, i) => {
|
|
171
|
+
const entry = images[key];
|
|
172
|
+
if (entry.type === "imagePrompt" && entry.referenceImageName) {
|
|
173
|
+
const refPath = combinedImageRefsForImagePrompt[entry.referenceImageName];
|
|
174
|
+
if (!refPath) {
|
|
175
|
+
GraphAILogger.warn(`imagePrompt "${key}": referenceImageName "${entry.referenceImageName}" not found — generating without reference`);
|
|
176
|
+
}
|
|
177
|
+
localImageRefs[key] = await generateReferenceImage({
|
|
178
|
+
context,
|
|
179
|
+
key,
|
|
180
|
+
index: beatIndex * 100 + i,
|
|
181
|
+
image: entry,
|
|
182
|
+
referenceImagePath: refPath,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
}));
|
|
186
|
+
// Stage 3: moviePrompt (imageName references imageRefs only)
|
|
148
187
|
const combinedImageRefs = { ...globalImageRefs, ...localImageRefs };
|
|
149
188
|
await Promise.all(Object.keys(images)
|
|
150
189
|
.sort()
|
|
151
190
|
.map(async (key, i) => {
|
|
152
191
|
const entry = images[key];
|
|
153
192
|
if (entry.type === "moviePrompt") {
|
|
154
|
-
const
|
|
155
|
-
const refImagePath = mp.imageName ? combinedImageRefs[mp.imageName] : undefined;
|
|
193
|
+
const refImagePath = entry.imageName ? combinedImageRefs[entry.imageName] : undefined;
|
|
156
194
|
localMovieRefs[key] = await generateReferenceMovie({
|
|
157
195
|
context,
|
|
158
196
|
key,
|
|
159
197
|
index: beatIndex * 100 + i,
|
|
160
|
-
moviePrompt:
|
|
198
|
+
moviePrompt: entry,
|
|
161
199
|
imagePath: refImagePath,
|
|
162
200
|
});
|
|
163
201
|
}
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { GraphData } from "graphai";
|
|
2
2
|
import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
|
|
3
3
|
export declare const beat_graph_data: {
|
|
4
4
|
version: number;
|
|
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
|
|
|
463
463
|
};
|
|
464
464
|
};
|
|
465
465
|
export declare const images_graph_data: GraphData;
|
|
466
|
-
export
|
|
466
|
+
export { graphOption } from "./graph_option.js";
|
|
467
467
|
type ImageOptions = {
|
|
468
468
|
imageAgents: Record<string, unknown>;
|
|
469
469
|
};
|
|
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
|
|
|
481
481
|
withBackup?: boolean;
|
|
482
482
|
};
|
|
483
483
|
}) => Promise<void>;
|
|
484
|
-
export {};
|
package/lib/actions/images.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
|
-
import { GraphAI, GraphAILogger
|
|
3
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
4
|
import { AuthenticationError, RateLimitError } from "openai/index.js";
|
|
5
5
|
import * as vanilla from "@graphai/vanilla";
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
|
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
|
|
12
12
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
13
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
14
|
-
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
15
13
|
import { audioCheckerError } from "../utils/error_cause.js";
|
|
16
14
|
import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
|
|
17
15
|
import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
|
|
18
16
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
import { graphOption } from "./graph_option.js";
|
|
19
18
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
20
19
|
const imageAgents = {
|
|
21
20
|
imageGenAIAgent,
|
|
@@ -432,20 +431,8 @@ export const images_graph_data = {
|
|
|
432
431
|
},
|
|
433
432
|
},
|
|
434
433
|
};
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
agentFilters: [
|
|
438
|
-
{
|
|
439
|
-
name: "fileCacheAgentFilter",
|
|
440
|
-
agent: fileCacheAgentFilter,
|
|
441
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
|
|
442
|
-
},
|
|
443
|
-
],
|
|
444
|
-
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
445
|
-
config: settings2GraphAIConfig(settings, process.env),
|
|
446
|
-
};
|
|
447
|
-
return options;
|
|
448
|
-
};
|
|
434
|
+
// graphOption moved to graph_option.ts to break circular dependency with image_references.ts
|
|
435
|
+
export { graphOption } from "./graph_option.js";
|
|
449
436
|
const prepareGenerateImages = async (context) => {
|
|
450
437
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
451
438
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
|
|
|
13
13
|
export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
|
|
14
14
|
export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
|
|
15
15
|
export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
|
|
16
|
+
export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
|
|
17
|
+
export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
|
|
18
|
+
export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
|
|
16
19
|
export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
|
|
17
20
|
export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
|
|
18
21
|
style: "aspectFit" | "aspectFill";
|
package/lib/actions/movie.js
CHANGED
|
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
|
|
|
9
9
|
// const isMac = process.platform === "darwin";
|
|
10
10
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
11
11
|
const VIDEO_FPS = 30;
|
|
12
|
+
const DEFAULT_DUCKING_RATIO = 0.3;
|
|
12
13
|
export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
|
|
13
14
|
const videoId = `v${inputIndex}`;
|
|
14
15
|
const videoFilters = [];
|
|
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
|
|
|
241
242
|
return nextTransition !== null; // Any transition on next beat requires this beat's last frame
|
|
242
243
|
});
|
|
243
244
|
};
|
|
244
|
-
const
|
|
245
|
+
export const resolveMovieVolume = (beat, context) => {
|
|
246
|
+
const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
|
|
247
|
+
const ducking = context.presentationStyle.audioParams.ducking;
|
|
248
|
+
const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
|
|
249
|
+
if (ducking && hasSpeech) {
|
|
250
|
+
const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
|
|
251
|
+
return baseMovieVolume * ratio;
|
|
252
|
+
}
|
|
253
|
+
return baseMovieVolume;
|
|
254
|
+
};
|
|
255
|
+
export const isExplicitMixMode = (context) => {
|
|
256
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
257
|
+
const duckingRequested = audioParams.ducking !== undefined;
|
|
258
|
+
const speechSuppressed = audioParams.suppressSpeech === true;
|
|
259
|
+
const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
|
|
260
|
+
const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
|
|
261
|
+
return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
|
|
262
|
+
};
|
|
263
|
+
export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
|
|
245
264
|
if (audioIdsFromMovieBeats.length > 0) {
|
|
246
265
|
const mainAudioId = "mainaudio";
|
|
247
266
|
const compositeAudioId = "composite";
|
|
248
267
|
const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
|
|
249
|
-
|
|
250
|
-
|
|
268
|
+
const useExplicitMix = isExplicitMixMode(context);
|
|
269
|
+
if (useExplicitMix) {
|
|
270
|
+
// Explicit mode: normalize=0 + limiter.
|
|
271
|
+
// ttsVolume is applied in addBGMAgent to avoid changing BGM level.
|
|
272
|
+
// Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
|
|
273
|
+
const mixedId = "mixed";
|
|
274
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
275
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
|
|
276
|
+
// Limiter as failsafe
|
|
277
|
+
ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
// Legacy mode: normalize=1 (current behavior, fully backward compatible)
|
|
281
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
282
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
283
|
+
}
|
|
251
284
|
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
252
285
|
}
|
|
253
286
|
return artifactAudioId;
|
|
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
420
453
|
transitionVideoIds.push(transitionVideoId);
|
|
421
454
|
}
|
|
422
455
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
423
|
-
const movieVolume = beat
|
|
456
|
+
const movieVolume = resolveMovieVolume(beat, context);
|
|
424
457
|
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
425
458
|
// TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
|
|
426
459
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
442
475
|
}
|
|
443
476
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
444
477
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
445
|
-
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
|
|
478
|
+
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
|
|
446
479
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
447
480
|
const endTime = performance.now();
|
|
448
481
|
GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
|
|
@@ -1,3 +1,13 @@
|
|
|
1
1
|
import type { AgentFunctionInfo } from "graphai";
|
|
2
|
+
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
|
|
4
|
+
useExplicitMix: boolean;
|
|
5
|
+
voiceVolume: number;
|
|
6
|
+
};
|
|
7
|
+
export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
|
|
8
|
+
amixNormalize: string;
|
|
9
|
+
mixedOutputId: string;
|
|
10
|
+
limiterFilter: string | undefined;
|
|
11
|
+
};
|
|
2
12
|
declare const addBGMAgentInfo: AgentFunctionInfo;
|
|
3
13
|
export default addBGMAgentInfo;
|
|
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
|
|
|
3
3
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
4
4
|
import { isFile } from "../utils/file.js";
|
|
5
5
|
import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
|
|
6
|
+
export const resolveAddBgmMixParams = (audioParams) => {
|
|
7
|
+
const useExplicitMix = audioParams.ttsVolume !== undefined;
|
|
8
|
+
const ttsVolume = audioParams.ttsVolume ?? 1.0;
|
|
9
|
+
return {
|
|
10
|
+
useExplicitMix,
|
|
11
|
+
voiceVolume: audioParams.audioVolume * ttsVolume,
|
|
12
|
+
};
|
|
13
|
+
};
|
|
14
|
+
export const resolveAddBgmFilterConfig = (useExplicitMix) => {
|
|
15
|
+
const amixNormalize = useExplicitMix ? ":normalize=0" : "";
|
|
16
|
+
return {
|
|
17
|
+
amixNormalize,
|
|
18
|
+
mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
|
|
19
|
+
limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
|
|
20
|
+
};
|
|
21
|
+
};
|
|
6
22
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
7
23
|
const { voiceFile, outputFile, context } = namedInputs;
|
|
8
24
|
const { musicFile } = params;
|
|
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
24
40
|
const ffmpegContext = FfmpegContextInit();
|
|
25
41
|
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
|
|
26
42
|
const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
ffmpegContext.filterComplex.push(`[
|
|
30
|
-
ffmpegContext.filterComplex.push(`[
|
|
43
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
44
|
+
const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
|
|
45
|
+
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
|
|
46
|
+
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
|
|
47
|
+
const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
|
|
48
|
+
ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
|
|
49
|
+
if (limiterFilter) {
|
|
50
|
+
ffmpegContext.filterComplex.push(limiterFilter);
|
|
51
|
+
}
|
|
52
|
+
ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
|
|
31
53
|
ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
|
|
32
54
|
try {
|
|
33
55
|
await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
|
|
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
|
|
|
100
100
|
model,
|
|
101
101
|
prompt,
|
|
102
102
|
config: {
|
|
103
|
-
durationSeconds: capabilities?.
|
|
103
|
+
durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
|
|
104
104
|
aspectRatio,
|
|
105
105
|
personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
|
|
106
106
|
},
|
|
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
|
|
|
3
3
|
import Replicate from "replicate";
|
|
4
4
|
import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
|
|
5
5
|
import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
|
|
6
|
-
|
|
6
|
+
function replicate_get_videoUrl(output) {
|
|
7
|
+
if (typeof output === "string")
|
|
8
|
+
return output;
|
|
9
|
+
if (output && typeof output === "object" && "url" in output)
|
|
10
|
+
return output.url();
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
13
|
+
async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
|
|
7
14
|
const replicate = new Replicate({
|
|
8
15
|
auth: apiKey,
|
|
9
16
|
});
|
|
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
37
44
|
input.image = base64Image;
|
|
38
45
|
}
|
|
39
46
|
}
|
|
47
|
+
// Add reference images if provided and model supports it
|
|
48
|
+
const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
|
|
49
|
+
if (referenceImages && referenceImages.length > 0) {
|
|
50
|
+
if (!referenceImagesParam) {
|
|
51
|
+
GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
|
|
52
|
+
}
|
|
53
|
+
else if (imagePath) {
|
|
54
|
+
GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
input[referenceImagesParam] = referenceImages.map((ref) => {
|
|
58
|
+
const buffer = readFileSync(ref.imagePath);
|
|
59
|
+
return `data:image/png;base64,${buffer.toString("base64")}`;
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
40
63
|
// Add last frame image if provided and model supports it
|
|
41
64
|
if (lastFrameImagePath) {
|
|
42
65
|
const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
|
|
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
57
80
|
try {
|
|
58
81
|
const output = await replicate.run(model, { input });
|
|
59
82
|
// Download the generated video
|
|
60
|
-
|
|
61
|
-
|
|
83
|
+
// Some models return a FileOutput object with a url() method; others return a plain string URL.
|
|
84
|
+
const videoUrl = replicate_get_videoUrl(output);
|
|
85
|
+
if (videoUrl) {
|
|
62
86
|
const videoResponse = await fetch(videoUrl);
|
|
63
87
|
if (!videoResponse.ok) {
|
|
64
88
|
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
|
|
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
89
113
|
return "9:16";
|
|
90
114
|
};
|
|
91
115
|
export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
92
|
-
const { prompt, imagePath, lastFrameImagePath } = namedInputs;
|
|
116
|
+
const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
|
|
93
117
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
94
118
|
const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
|
|
95
119
|
if (!provider2MovieAgent.replicate.modelParams[model]) {
|
|
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
|
110
134
|
});
|
|
111
135
|
}
|
|
112
136
|
try {
|
|
113
|
-
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
|
|
137
|
+
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
|
|
114
138
|
if (buffer) {
|
|
115
139
|
return { buffer };
|
|
116
140
|
}
|
|
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
|
|
|
80
80
|
durations: number[];
|
|
81
81
|
start_image: string | undefined;
|
|
82
82
|
last_image?: string;
|
|
83
|
+
reference_images_param?: string;
|
|
83
84
|
price_per_sec: number;
|
|
84
85
|
}>;
|
|
85
86
|
};
|
|
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
|
|
|
90
91
|
keyName: string;
|
|
91
92
|
modelParams: Record<string, {
|
|
92
93
|
durations: number[];
|
|
94
|
+
supportsDuration: boolean;
|
|
93
95
|
supportsLastFrame: boolean;
|
|
94
96
|
supportsReferenceImages: boolean;
|
|
95
97
|
supportsPersonGeneration: boolean;
|
|
@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
|
|
|
92
92
|
"kwaivgi/kling-v2.1-master",
|
|
93
93
|
"google/veo-2",
|
|
94
94
|
"google/veo-3",
|
|
95
|
+
"google/veo-3.1",
|
|
96
|
+
"google/veo-3.1-fast",
|
|
97
|
+
"google/veo-3.1-lite",
|
|
95
98
|
"google/veo-3-fast",
|
|
96
99
|
"minimax/video-01",
|
|
97
100
|
"minimax/hailuo-02",
|
|
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
|
|
|
99
102
|
"pixverse/pixverse-v4.5",
|
|
100
103
|
"wan-video/wan-2.2-i2v-fast",
|
|
101
104
|
"wan-video/wan-2.2-t2v-fast",
|
|
105
|
+
"xai/grok-imagine-video",
|
|
106
|
+
"xai/grok-imagine-r2v",
|
|
107
|
+
"runwayml/gen-4.5",
|
|
108
|
+
"kwaivgi/kling-v3-omni-video",
|
|
109
|
+
"kwaivgi/kling-v3-video",
|
|
102
110
|
],
|
|
103
111
|
modelParams: {
|
|
104
112
|
"bytedance/seedance-1-lite": {
|
|
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
|
|
|
138
146
|
start_image: "image",
|
|
139
147
|
price_per_sec: 0.75,
|
|
140
148
|
},
|
|
149
|
+
"google/veo-3.1": {
|
|
150
|
+
durations: [4, 6, 8],
|
|
151
|
+
start_image: "image",
|
|
152
|
+
last_image: "last_frame_image",
|
|
153
|
+
reference_images_param: "reference_images",
|
|
154
|
+
price_per_sec: 0.75,
|
|
155
|
+
},
|
|
156
|
+
"google/veo-3.1-fast": {
|
|
157
|
+
durations: [4, 6, 8],
|
|
158
|
+
start_image: "image",
|
|
159
|
+
last_image: "last_frame_image",
|
|
160
|
+
price_per_sec: 0.4,
|
|
161
|
+
},
|
|
162
|
+
"google/veo-3.1-lite": {
|
|
163
|
+
durations: [4, 6, 8],
|
|
164
|
+
start_image: "image",
|
|
165
|
+
last_image: "last_frame",
|
|
166
|
+
price_per_sec: 0.05,
|
|
167
|
+
},
|
|
141
168
|
"google/veo-3-fast": {
|
|
142
169
|
durations: [8],
|
|
143
170
|
start_image: "image",
|
|
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
|
|
|
175
202
|
start_image: undefined,
|
|
176
203
|
price_per_sec: 0.012,
|
|
177
204
|
},
|
|
205
|
+
"xai/grok-imagine-video": {
|
|
206
|
+
durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
207
|
+
start_image: "image",
|
|
208
|
+
price_per_sec: 0.08,
|
|
209
|
+
},
|
|
210
|
+
"xai/grok-imagine-r2v": {
|
|
211
|
+
durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
212
|
+
start_image: undefined,
|
|
213
|
+
reference_images_param: "reference_images",
|
|
214
|
+
price_per_sec: 0.08,
|
|
215
|
+
},
|
|
216
|
+
"runwayml/gen-4.5": {
|
|
217
|
+
durations: [5, 10],
|
|
218
|
+
start_image: "image",
|
|
219
|
+
price_per_sec: 0.25,
|
|
220
|
+
},
|
|
221
|
+
"kwaivgi/kling-v3-omni-video": {
|
|
222
|
+
durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
223
|
+
start_image: "start_image",
|
|
224
|
+
last_image: "end_image",
|
|
225
|
+
reference_images_param: "reference_images",
|
|
226
|
+
price_per_sec: 0.3,
|
|
227
|
+
},
|
|
228
|
+
"kwaivgi/kling-v3-video": {
|
|
229
|
+
durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
230
|
+
start_image: "start_image",
|
|
231
|
+
last_image: "end_image",
|
|
232
|
+
reference_images_param: "reference_images",
|
|
233
|
+
price_per_sec: 0.3,
|
|
234
|
+
},
|
|
178
235
|
},
|
|
179
236
|
},
|
|
180
237
|
google: {
|
|
181
238
|
agentName: "movieGenAIAgent",
|
|
182
239
|
defaultModel: "veo-2.0-generate-001",
|
|
183
|
-
models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
|
|
240
|
+
models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
|
|
184
241
|
keyName: "GEMINI_API_KEY",
|
|
185
242
|
modelParams: {
|
|
243
|
+
"veo-3.1-lite-generate-preview": {
|
|
244
|
+
durations: [4, 6, 8],
|
|
245
|
+
supportsDuration: true,
|
|
246
|
+
supportsLastFrame: true,
|
|
247
|
+
supportsReferenceImages: false,
|
|
248
|
+
supportsPersonGeneration: false,
|
|
249
|
+
},
|
|
186
250
|
"veo-3.1-generate-preview": {
|
|
187
251
|
durations: [4, 6, 8],
|
|
252
|
+
supportsDuration: true,
|
|
188
253
|
supportsLastFrame: true,
|
|
189
254
|
supportsReferenceImages: true,
|
|
190
255
|
supportsPersonGeneration: false,
|
|
191
256
|
},
|
|
192
257
|
"veo-3.0-generate-001": {
|
|
193
|
-
durations: [
|
|
258
|
+
durations: [8],
|
|
259
|
+
supportsDuration: false, // Veo 3.0 always generates 8s
|
|
194
260
|
supportsLastFrame: false,
|
|
195
261
|
supportsReferenceImages: false,
|
|
196
262
|
supportsPersonGeneration: false,
|
|
197
263
|
},
|
|
198
264
|
"veo-2.0-generate-001": {
|
|
199
|
-
durations: [5, 6,
|
|
265
|
+
durations: [5, 6, 8],
|
|
266
|
+
supportsDuration: true,
|
|
200
267
|
supportsLastFrame: false, // Vertex AI only
|
|
201
268
|
supportsReferenceImages: false,
|
|
202
269
|
supportsPersonGeneration: true,
|
|
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
|
|
|
229
296
|
agentName: "lipSyncReplicateAgent",
|
|
230
297
|
defaultModel: "bytedance/omni-human",
|
|
231
298
|
keyName: "REPLICATE_API_TOKEN",
|
|
232
|
-
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
|
|
299
|
+
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
|
|
233
300
|
modelParams: {
|
|
234
301
|
"bytedance/latentsync": {
|
|
235
302
|
identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
|
|
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
|
|
|
247
314
|
audio: "audio",
|
|
248
315
|
price_per_sec: 0.14,
|
|
249
316
|
},
|
|
317
|
+
"pixverse/lipsync": {
|
|
318
|
+
identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
|
|
319
|
+
video: "video",
|
|
320
|
+
audio: "audio",
|
|
321
|
+
},
|
|
250
322
|
/* NOTE: This model does not work with large base64 urls.
|
|
251
323
|
"sync/lipsync-2": {
|
|
252
324
|
video: "video",
|
|
253
325
|
audio: "audio",
|
|
254
326
|
},
|
|
255
327
|
*/
|
|
256
|
-
/* NOTE: This model does not work
|
|
328
|
+
/* NOTE: This model does not work with base64 data URIs (error 1201).
|
|
257
329
|
"kwaivgi/kling-lip-sync": {
|
|
330
|
+
identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
|
|
258
331
|
video: "video_url",
|
|
259
332
|
audio: "audio_file",
|
|
260
333
|
},
|