mulmocast 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +7 -35
- package/lib/actions/graph_option.d.ts +4 -0
- package/lib/actions/graph_option.js +19 -0
- package/lib/actions/image_references.js +3 -3
- package/lib/actions/images.d.ts +2 -3
- package/lib/actions/images.js +5 -19
- package/lib/actions/movie.d.ts +3 -0
- package/lib/actions/movie.js +38 -5
- package/lib/agents/add_bgm_agent.d.ts +10 -0
- package/lib/agents/add_bgm_agent.js +26 -4
- package/lib/agents/movie_genai_agent.js +1 -1
- package/lib/agents/movie_replicate_agent.js +29 -5
- package/lib/methods/mulmo_presentation_style.d.ts +5 -1
- package/lib/methods/mulmo_presentation_style.js +27 -3
- package/lib/types/provider2agent.d.ts +2 -0
- package/lib/types/provider2agent.js +78 -5
- package/lib/types/schema.d.ts +55 -4
- package/lib/types/schema.js +12 -1
- package/lib/utils/context.d.ts +28 -2
- package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
- package/lib/utils/image_plugins/html_tailwind.js +56 -5
- package/package.json +18 -16
- package/scripts/test/fixtures/movie_tone_high.mov +0 -0
- package/scripts/test/fixtures/movie_tone_low.mov +0 -0
- package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
- package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
- package/scripts/test/test_audio_mix.json +91 -0
- package/scripts/test/test_audio_mix_beat_vol.json +100 -0
- package/scripts/test/test_audio_mix_ducking.json +91 -0
- package/scripts/test/test_audio_mix_legacy.json +90 -0
- package/scripts/test/test_grok.json +57 -0
- package/scripts/test/test_image_references.json +74 -0
- package/scripts/test/test_kling_v3.json +54 -0
- package/scripts/test/test_kling_v3_omni.json +54 -0
- package/scripts/test/test_lipsync2.json +48 -52
- package/scripts/test/test_lipsync5.json +66 -0
- package/scripts/test/test_runway.json +54 -0
- package/scripts/test/test_threejs.json +241 -0
- package/scripts/test/test_threejs_glb.json +154 -0
- package/scripts/test/test_veo31_lite.json +39 -0
package/lib/actions/audio.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
|
-
import { GraphAI,
|
|
2
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
3
3
|
import * as agents from "@graphai/vanilla";
|
|
4
4
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
5
5
|
import { ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
|
|
6
|
-
import {
|
|
7
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
6
|
+
import { audioGraphOption } from "./graph_option.js";
|
|
8
7
|
import { getAudioArtifactFilePath, getAudioFilePath, getGroupedAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
|
|
9
|
-
import { localizedText
|
|
8
|
+
import { localizedText } from "../utils/utils.js";
|
|
10
9
|
import { text2hash } from "../utils/utils_node.js";
|
|
11
10
|
import { provider2TTSAgent } from "../types/provider2agent.js";
|
|
12
11
|
import { invalidAudioSourceError } from "../utils/error_cause.js";
|
|
@@ -210,21 +209,6 @@ export const audio_graph_data = {
|
|
|
210
209
|
},
|
|
211
210
|
},
|
|
212
211
|
};
|
|
213
|
-
const agentFilters = [
|
|
214
|
-
{
|
|
215
|
-
name: "fileCacheAgentFilter",
|
|
216
|
-
agent: fileCacheAgentFilter,
|
|
217
|
-
nodeIds: ["tts"],
|
|
218
|
-
},
|
|
219
|
-
];
|
|
220
|
-
const getConcurrency = (context) => {
|
|
221
|
-
// Check if any speaker uses elevenlabs or kotodama (providers that require concurrency = 1)
|
|
222
|
-
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
223
|
-
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
224
|
-
return provider2TTSAgent[provider].hasLimitedConcurrency;
|
|
225
|
-
});
|
|
226
|
-
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
227
|
-
};
|
|
228
212
|
const audioAgents = {
|
|
229
213
|
...vanillaAgents,
|
|
230
214
|
fileWriteAgent,
|
|
@@ -246,9 +230,8 @@ export const generateBeatAudio = async (index, context, args) => {
|
|
|
246
230
|
const audioSegmentDirPath = context.fileDirs.grouped ? audioDirPath : resolveDirPath(audioDirPath, fileName);
|
|
247
231
|
mkdir(outDirPath);
|
|
248
232
|
mkdir(audioSegmentDirPath);
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
|
|
233
|
+
const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, await audioGraphOption(context, settings));
|
|
234
|
+
callbacks?.forEach((callback) => graph.registerCallback(callback));
|
|
252
235
|
graph.injectValue("__mapIndex", index);
|
|
253
236
|
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
254
237
|
graph.injectValue("studioBeat", context.studio.beats[index]);
|
|
@@ -260,11 +243,6 @@ export const generateBeatAudio = async (index, context, args) => {
|
|
|
260
243
|
else {
|
|
261
244
|
graph.injectValue("lang", context.lang);
|
|
262
245
|
}
|
|
263
|
-
if (callbacks) {
|
|
264
|
-
callbacks.forEach((callback) => {
|
|
265
|
-
graph.registerCallback(callback);
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
246
|
await graph.run();
|
|
269
247
|
}
|
|
270
248
|
catch (error) {
|
|
@@ -288,19 +266,13 @@ export const audio = async (context, args) => {
|
|
|
288
266
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
|
|
289
267
|
mkdir(outDirPath);
|
|
290
268
|
mkdir(audioSegmentDirPath);
|
|
291
|
-
const
|
|
292
|
-
|
|
293
|
-
const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
|
|
269
|
+
const graph = new GraphAI(audio_graph_data, audioAgents, await audioGraphOption(context, settings));
|
|
270
|
+
callbacks?.forEach((callback) => graph.registerCallback(callback));
|
|
294
271
|
graph.injectValue("context", context);
|
|
295
272
|
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
296
273
|
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
|
297
274
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
298
275
|
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
299
|
-
if (callbacks) {
|
|
300
|
-
callbacks.forEach((callback) => {
|
|
301
|
-
graph.registerCallback(callback);
|
|
302
|
-
});
|
|
303
|
-
}
|
|
304
276
|
const result = await graph.run();
|
|
305
277
|
writingMessage(audioCombinedFilePath);
|
|
306
278
|
MulmoStudioContextMethods.setSessionState(context, "audio", false, true);
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { GraphOptions } from "graphai";
|
|
2
|
+
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
export declare const imageGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
|
|
4
|
+
export declare const audioGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { TaskManager } from "graphai";
|
|
2
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
3
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
4
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
5
|
+
const createGraphOption = (concurrency, cacheNodeIds, settings) => ({
|
|
6
|
+
agentFilters: [
|
|
7
|
+
{
|
|
8
|
+
name: "fileCacheAgentFilter",
|
|
9
|
+
agent: fileCacheAgentFilter,
|
|
10
|
+
nodeIds: cacheNodeIds,
|
|
11
|
+
},
|
|
12
|
+
],
|
|
13
|
+
taskManager: new TaskManager(concurrency),
|
|
14
|
+
config: settings2GraphAIConfig(settings, process.env),
|
|
15
|
+
});
|
|
16
|
+
const IMAGE_CACHE_NODE_IDS = ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"];
|
|
17
|
+
const AUDIO_CACHE_NODE_IDS = ["tts"];
|
|
18
|
+
export const imageGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getImageConcurrency(context.presentationStyle), IMAGE_CACHE_NODE_IDS, settings);
|
|
19
|
+
export const audioGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getAudioConcurrency(context.presentationStyle), AUDIO_CACHE_NODE_IDS, settings);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
2
|
import { getReferenceImagePath } from "../utils/file.js";
|
|
3
|
-
import {
|
|
3
|
+
import { imageGraphOption } from "./graph_option.js";
|
|
4
4
|
import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
|
|
5
5
|
import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
|
|
6
6
|
import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
|
|
@@ -41,7 +41,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
41
41
|
},
|
|
42
42
|
};
|
|
43
43
|
try {
|
|
44
|
-
const options = await
|
|
44
|
+
const options = await imageGraphOption(context);
|
|
45
45
|
const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent, imageReplicateAgent }, options);
|
|
46
46
|
await graph.run();
|
|
47
47
|
return imagePath;
|
|
@@ -126,7 +126,7 @@ const generateReferenceMovie = async (inputs) => {
|
|
|
126
126
|
},
|
|
127
127
|
};
|
|
128
128
|
try {
|
|
129
|
-
const options = await
|
|
129
|
+
const options = await imageGraphOption(context);
|
|
130
130
|
const graph = new GraphAI(movie_graph_data, { movieGenAIAgent, movieReplicateAgent, mediaMockAgent }, options);
|
|
131
131
|
await graph.run();
|
|
132
132
|
return moviePath;
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { GraphData } from "graphai";
|
|
2
2
|
import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
|
|
3
3
|
export declare const beat_graph_data: {
|
|
4
4
|
version: number;
|
|
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
|
|
|
463
463
|
};
|
|
464
464
|
};
|
|
465
465
|
export declare const images_graph_data: GraphData;
|
|
466
|
-
export
|
|
466
|
+
export { imageGraphOption } from "./graph_option.js";
|
|
467
467
|
type ImageOptions = {
|
|
468
468
|
imageAgents: Record<string, unknown>;
|
|
469
469
|
};
|
|
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
|
|
|
481
481
|
withBackup?: boolean;
|
|
482
482
|
};
|
|
483
483
|
}) => Promise<void>;
|
|
484
|
-
export {};
|
package/lib/actions/images.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
|
-
import { GraphAI, GraphAILogger
|
|
3
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
4
|
import { AuthenticationError, RateLimitError } from "openai/index.js";
|
|
5
5
|
import * as vanilla from "@graphai/vanilla";
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
|
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
|
|
12
12
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
13
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
14
|
-
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
15
13
|
import { audioCheckerError } from "../utils/error_cause.js";
|
|
16
14
|
import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
|
|
17
15
|
import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
|
|
18
16
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
import { imageGraphOption } from "./graph_option.js";
|
|
19
18
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
20
19
|
const imageAgents = {
|
|
21
20
|
imageGenAIAgent,
|
|
@@ -432,20 +431,7 @@ export const images_graph_data = {
|
|
|
432
431
|
},
|
|
433
432
|
},
|
|
434
433
|
};
|
|
435
|
-
export
|
|
436
|
-
const options = {
|
|
437
|
-
agentFilters: [
|
|
438
|
-
{
|
|
439
|
-
name: "fileCacheAgentFilter",
|
|
440
|
-
agent: fileCacheAgentFilter,
|
|
441
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
|
|
442
|
-
},
|
|
443
|
-
],
|
|
444
|
-
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
445
|
-
config: settings2GraphAIConfig(settings, process.env),
|
|
446
|
-
};
|
|
447
|
-
return options;
|
|
448
|
-
};
|
|
434
|
+
export { imageGraphOption } from "./graph_option.js";
|
|
449
435
|
const prepareGenerateImages = async (context) => {
|
|
450
436
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
451
437
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
@@ -472,7 +458,7 @@ const generateImages = async (context, args) => {
|
|
|
472
458
|
...defaultAgents,
|
|
473
459
|
...optionImageAgents,
|
|
474
460
|
};
|
|
475
|
-
const graph = new GraphAI(images_graph_data, graphaiAgent, await
|
|
461
|
+
const graph = new GraphAI(images_graph_data, graphaiAgent, await imageGraphOption(context, settings));
|
|
476
462
|
Object.keys(injections).forEach((key) => {
|
|
477
463
|
graph.injectValue(key, injections[key]);
|
|
478
464
|
});
|
|
@@ -512,7 +498,7 @@ export const generateBeatImage = async (inputs) => {
|
|
|
512
498
|
try {
|
|
513
499
|
const { index, context, args } = inputs;
|
|
514
500
|
const { settings, callbacks, forceMovie, forceImage, forceLipSync, forceSoundEffect, withBackup } = args ?? {};
|
|
515
|
-
const options = await
|
|
501
|
+
const options = await imageGraphOption(context, settings);
|
|
516
502
|
const injections = await prepareGenerateImages(context);
|
|
517
503
|
const graph = new GraphAI(beat_graph_data, defaultAgents, options);
|
|
518
504
|
Object.keys(injections).forEach((key) => {
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
|
|
|
13
13
|
export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
|
|
14
14
|
export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
|
|
15
15
|
export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
|
|
16
|
+
export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
|
|
17
|
+
export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
|
|
18
|
+
export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
|
|
16
19
|
export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
|
|
17
20
|
export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
|
|
18
21
|
style: "aspectFit" | "aspectFill";
|
package/lib/actions/movie.js
CHANGED
|
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
|
|
|
9
9
|
// const isMac = process.platform === "darwin";
|
|
10
10
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
11
11
|
const VIDEO_FPS = 30;
|
|
12
|
+
const DEFAULT_DUCKING_RATIO = 0.3;
|
|
12
13
|
export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
|
|
13
14
|
const videoId = `v${inputIndex}`;
|
|
14
15
|
const videoFilters = [];
|
|
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
|
|
|
241
242
|
return nextTransition !== null; // Any transition on next beat requires this beat's last frame
|
|
242
243
|
});
|
|
243
244
|
};
|
|
244
|
-
const
|
|
245
|
+
export const resolveMovieVolume = (beat, context) => {
|
|
246
|
+
const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
|
|
247
|
+
const ducking = context.presentationStyle.audioParams.ducking;
|
|
248
|
+
const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
|
|
249
|
+
if (ducking && hasSpeech) {
|
|
250
|
+
const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
|
|
251
|
+
return baseMovieVolume * ratio;
|
|
252
|
+
}
|
|
253
|
+
return baseMovieVolume;
|
|
254
|
+
};
|
|
255
|
+
export const isExplicitMixMode = (context) => {
|
|
256
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
257
|
+
const duckingRequested = audioParams.ducking !== undefined;
|
|
258
|
+
const speechSuppressed = audioParams.suppressSpeech === true;
|
|
259
|
+
const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
|
|
260
|
+
const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
|
|
261
|
+
return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
|
|
262
|
+
};
|
|
263
|
+
export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
|
|
245
264
|
if (audioIdsFromMovieBeats.length > 0) {
|
|
246
265
|
const mainAudioId = "mainaudio";
|
|
247
266
|
const compositeAudioId = "composite";
|
|
248
267
|
const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
|
|
249
|
-
|
|
250
|
-
|
|
268
|
+
const useExplicitMix = isExplicitMixMode(context);
|
|
269
|
+
if (useExplicitMix) {
|
|
270
|
+
// Explicit mode: normalize=0 + limiter.
|
|
271
|
+
// ttsVolume is applied in addBGMAgent to avoid changing BGM level.
|
|
272
|
+
// Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
|
|
273
|
+
const mixedId = "mixed";
|
|
274
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
275
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
|
|
276
|
+
// Limiter as failsafe
|
|
277
|
+
ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
// Legacy mode: normalize=1 (current behavior, fully backward compatible)
|
|
281
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
282
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
283
|
+
}
|
|
251
284
|
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
252
285
|
}
|
|
253
286
|
return artifactAudioId;
|
|
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
420
453
|
transitionVideoIds.push(transitionVideoId);
|
|
421
454
|
}
|
|
422
455
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
423
|
-
const movieVolume = beat
|
|
456
|
+
const movieVolume = resolveMovieVolume(beat, context);
|
|
424
457
|
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
425
458
|
// TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
|
|
426
459
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
442
475
|
}
|
|
443
476
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
444
477
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
445
|
-
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
|
|
478
|
+
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
|
|
446
479
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
447
480
|
const endTime = performance.now();
|
|
448
481
|
GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
|
|
@@ -1,3 +1,13 @@
|
|
|
1
1
|
import type { AgentFunctionInfo } from "graphai";
|
|
2
|
+
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
|
|
4
|
+
useExplicitMix: boolean;
|
|
5
|
+
voiceVolume: number;
|
|
6
|
+
};
|
|
7
|
+
export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
|
|
8
|
+
amixNormalize: string;
|
|
9
|
+
mixedOutputId: string;
|
|
10
|
+
limiterFilter: string | undefined;
|
|
11
|
+
};
|
|
2
12
|
declare const addBGMAgentInfo: AgentFunctionInfo;
|
|
3
13
|
export default addBGMAgentInfo;
|
|
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
|
|
|
3
3
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
4
4
|
import { isFile } from "../utils/file.js";
|
|
5
5
|
import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
|
|
6
|
+
export const resolveAddBgmMixParams = (audioParams) => {
|
|
7
|
+
const useExplicitMix = audioParams.ttsVolume !== undefined;
|
|
8
|
+
const ttsVolume = audioParams.ttsVolume ?? 1.0;
|
|
9
|
+
return {
|
|
10
|
+
useExplicitMix,
|
|
11
|
+
voiceVolume: audioParams.audioVolume * ttsVolume,
|
|
12
|
+
};
|
|
13
|
+
};
|
|
14
|
+
export const resolveAddBgmFilterConfig = (useExplicitMix) => {
|
|
15
|
+
const amixNormalize = useExplicitMix ? ":normalize=0" : "";
|
|
16
|
+
return {
|
|
17
|
+
amixNormalize,
|
|
18
|
+
mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
|
|
19
|
+
limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
|
|
20
|
+
};
|
|
21
|
+
};
|
|
6
22
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
7
23
|
const { voiceFile, outputFile, context } = namedInputs;
|
|
8
24
|
const { musicFile } = params;
|
|
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
24
40
|
const ffmpegContext = FfmpegContextInit();
|
|
25
41
|
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
|
|
26
42
|
const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
ffmpegContext.filterComplex.push(`[
|
|
30
|
-
ffmpegContext.filterComplex.push(`[
|
|
43
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
44
|
+
const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
|
|
45
|
+
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
|
|
46
|
+
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
|
|
47
|
+
const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
|
|
48
|
+
ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
|
|
49
|
+
if (limiterFilter) {
|
|
50
|
+
ffmpegContext.filterComplex.push(limiterFilter);
|
|
51
|
+
}
|
|
52
|
+
ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
|
|
31
53
|
ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
|
|
32
54
|
try {
|
|
33
55
|
await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
|
|
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
|
|
|
100
100
|
model,
|
|
101
101
|
prompt,
|
|
102
102
|
config: {
|
|
103
|
-
durationSeconds: capabilities?.
|
|
103
|
+
durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
|
|
104
104
|
aspectRatio,
|
|
105
105
|
personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
|
|
106
106
|
},
|
|
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
|
|
|
3
3
|
import Replicate from "replicate";
|
|
4
4
|
import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
|
|
5
5
|
import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
|
|
6
|
-
|
|
6
|
+
function replicate_get_videoUrl(output) {
|
|
7
|
+
if (typeof output === "string")
|
|
8
|
+
return output;
|
|
9
|
+
if (output && typeof output === "object" && "url" in output)
|
|
10
|
+
return output.url();
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
13
|
+
async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
|
|
7
14
|
const replicate = new Replicate({
|
|
8
15
|
auth: apiKey,
|
|
9
16
|
});
|
|
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
37
44
|
input.image = base64Image;
|
|
38
45
|
}
|
|
39
46
|
}
|
|
47
|
+
// Add reference images if provided and model supports it
|
|
48
|
+
const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
|
|
49
|
+
if (referenceImages && referenceImages.length > 0) {
|
|
50
|
+
if (!referenceImagesParam) {
|
|
51
|
+
GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
|
|
52
|
+
}
|
|
53
|
+
else if (imagePath) {
|
|
54
|
+
GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
input[referenceImagesParam] = referenceImages.map((ref) => {
|
|
58
|
+
const buffer = readFileSync(ref.imagePath);
|
|
59
|
+
return `data:image/png;base64,${buffer.toString("base64")}`;
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
40
63
|
// Add last frame image if provided and model supports it
|
|
41
64
|
if (lastFrameImagePath) {
|
|
42
65
|
const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
|
|
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
57
80
|
try {
|
|
58
81
|
const output = await replicate.run(model, { input });
|
|
59
82
|
// Download the generated video
|
|
60
|
-
|
|
61
|
-
|
|
83
|
+
// Some models return a FileOutput object with a url() method; others return a plain string URL.
|
|
84
|
+
const videoUrl = replicate_get_videoUrl(output);
|
|
85
|
+
if (videoUrl) {
|
|
62
86
|
const videoResponse = await fetch(videoUrl);
|
|
63
87
|
if (!videoResponse.ok) {
|
|
64
88
|
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
|
|
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
89
113
|
return "9:16";
|
|
90
114
|
};
|
|
91
115
|
export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
92
|
-
const { prompt, imagePath, lastFrameImagePath } = namedInputs;
|
|
116
|
+
const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
|
|
93
117
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
94
118
|
const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
|
|
95
119
|
if (!provider2MovieAgent.replicate.modelParams[model]) {
|
|
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
|
110
134
|
});
|
|
111
135
|
}
|
|
112
136
|
try {
|
|
113
|
-
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
|
|
137
|
+
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
|
|
114
138
|
if (buffer) {
|
|
115
139
|
return { buffer };
|
|
116
140
|
}
|
|
@@ -179,6 +179,7 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
179
179
|
imageName: string;
|
|
180
180
|
referenceType: "ASSET" | "STYLE";
|
|
181
181
|
}[] | undefined;
|
|
182
|
+
concurrency?: number | undefined;
|
|
182
183
|
speed?: number | undefined;
|
|
183
184
|
};
|
|
184
185
|
keyName: string;
|
|
@@ -204,7 +205,10 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
204
205
|
image?: string;
|
|
205
206
|
}>;
|
|
206
207
|
};
|
|
207
|
-
|
|
208
|
+
/** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
|
|
209
|
+
getImageConcurrency(presentationStyle: MulmoPresentationStyle): number;
|
|
210
|
+
/** Concurrency for audio/TTS generation graph */
|
|
211
|
+
getAudioConcurrency(presentationStyle: MulmoPresentationStyle): number;
|
|
208
212
|
getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
|
|
209
213
|
getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
|
|
210
214
|
};
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
import { isNull } from "graphai";
|
|
7
7
|
import { userAssert } from "../utils/utils.js";
|
|
8
8
|
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
|
|
9
|
-
import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
|
|
9
|
+
import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2TTSAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
|
|
10
10
|
const defaultTextSlideStyles = [
|
|
11
11
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
12
|
-
"body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
12
|
+
"body { margin: 60px; margin-top: 40px; color:#333; background-color:#fff; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
13
13
|
"h1 { font-size: 56px; margin-bottom: 20px; text-align: center }",
|
|
14
14
|
"h2 { font-size: 48px; text-align: center }",
|
|
15
15
|
"h3 { font-size: 36px }",
|
|
@@ -119,7 +119,17 @@ export const MulmoPresentationStyleMethods = {
|
|
|
119
119
|
const agentInfo = provider2LipSyncAgent[lipSyncProvider];
|
|
120
120
|
return agentInfo;
|
|
121
121
|
},
|
|
122
|
-
|
|
122
|
+
/** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
|
|
123
|
+
getImageConcurrency(presentationStyle) {
|
|
124
|
+
const imageConcurrency = presentationStyle.imageParams?.concurrency;
|
|
125
|
+
const movieConcurrency = presentationStyle.movieParams?.concurrency;
|
|
126
|
+
// User-specified concurrency takes precedence.
|
|
127
|
+
// Use the smaller of imageParams/movieParams since they share the same graph.
|
|
128
|
+
if (imageConcurrency !== undefined || movieConcurrency !== undefined) {
|
|
129
|
+
const values = [imageConcurrency, movieConcurrency].filter((v) => v !== undefined);
|
|
130
|
+
return Math.min(...values);
|
|
131
|
+
}
|
|
132
|
+
// Fallback: provider-based auto-detection
|
|
123
133
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
|
|
124
134
|
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
125
135
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
@@ -131,6 +141,20 @@ export const MulmoPresentationStyleMethods = {
|
|
|
131
141
|
}
|
|
132
142
|
return 4;
|
|
133
143
|
},
|
|
144
|
+
/** Concurrency for audio/TTS generation graph */
|
|
145
|
+
getAudioConcurrency(presentationStyle) {
|
|
146
|
+
// User-specified concurrency takes precedence
|
|
147
|
+
const userConcurrency = presentationStyle.audioParams?.concurrency;
|
|
148
|
+
if (userConcurrency !== undefined) {
|
|
149
|
+
return userConcurrency;
|
|
150
|
+
}
|
|
151
|
+
// Fallback: provider-based auto-detection
|
|
152
|
+
const hasLimitedConcurrencyProvider = Object.values(presentationStyle.speechParams.speakers).some((speaker) => {
|
|
153
|
+
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
154
|
+
return provider2TTSAgent[provider].hasLimitedConcurrency;
|
|
155
|
+
});
|
|
156
|
+
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
157
|
+
},
|
|
134
158
|
getHtmlImageAgentInfo(presentationStyle) {
|
|
135
159
|
const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
|
|
136
160
|
const defaultConfig = provider2LLMAgent[provider];
|
|
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
|
|
|
80
80
|
durations: number[];
|
|
81
81
|
start_image: string | undefined;
|
|
82
82
|
last_image?: string;
|
|
83
|
+
reference_images_param?: string;
|
|
83
84
|
price_per_sec: number;
|
|
84
85
|
}>;
|
|
85
86
|
};
|
|
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
|
|
|
90
91
|
keyName: string;
|
|
91
92
|
modelParams: Record<string, {
|
|
92
93
|
durations: number[];
|
|
94
|
+
supportsDuration: boolean;
|
|
93
95
|
supportsLastFrame: boolean;
|
|
94
96
|
supportsReferenceImages: boolean;
|
|
95
97
|
supportsPersonGeneration: boolean;
|