mulmocast 2.6.5 → 2.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/graph_option.d.ts +3 -0
- package/lib/actions/graph_option.js +18 -0
- package/lib/actions/image_references.js +1 -1
- package/lib/actions/images.d.ts +2 -3
- package/lib/actions/images.js +4 -17
- package/lib/actions/movie.d.ts +3 -0
- package/lib/actions/movie.js +38 -5
- package/lib/agents/add_bgm_agent.d.ts +10 -0
- package/lib/agents/add_bgm_agent.js +26 -4
- package/lib/agents/movie_genai_agent.js +1 -1
- package/lib/agents/movie_replicate_agent.js +29 -5
- package/lib/types/provider2agent.d.ts +2 -0
- package/lib/types/provider2agent.js +78 -5
- package/lib/types/schema.d.ts +34 -4
- package/lib/types/schema.js +9 -1
- package/lib/utils/context.d.ts +17 -2
- package/lib/utils/image_plugins/html_tailwind.d.ts +5 -0
- package/lib/utils/image_plugins/html_tailwind.js +56 -5
- package/package.json +8 -7
- package/scripts/test/fixtures/movie_tone_high.mov +0 -0
- package/scripts/test/fixtures/movie_tone_low.mov +0 -0
- package/scripts/test/fixtures/movie_tone_mid.mov +0 -0
- package/scripts/test/glb/sample_2026-03-15T172907.296_compat.glb +0 -0
- package/scripts/test/test_audio_mix.json +91 -0
- package/scripts/test/test_audio_mix_beat_vol.json +100 -0
- package/scripts/test/test_audio_mix_ducking.json +91 -0
- package/scripts/test/test_audio_mix_legacy.json +90 -0
- package/scripts/test/test_grok.json +57 -0
- package/scripts/test/test_image_references.json +74 -0
- package/scripts/test/test_kling_v3.json +54 -0
- package/scripts/test/test_kling_v3_omni.json +54 -0
- package/scripts/test/test_lipsync2.json +48 -52
- package/scripts/test/test_lipsync5.json +66 -0
- package/scripts/test/test_runway.json +54 -0
- package/scripts/test/test_threejs.json +241 -0
- package/scripts/test/test_threejs_glb.json +154 -0
- package/scripts/test/test_veo31_lite.json +39 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { TaskManager } from "graphai";
|
|
2
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
3
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
4
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
5
|
+
export const graphOption = async (context, settings) => {
|
|
6
|
+
const options = {
|
|
7
|
+
agentFilters: [
|
|
8
|
+
{
|
|
9
|
+
name: "fileCacheAgentFilter",
|
|
10
|
+
agent: fileCacheAgentFilter,
|
|
11
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
|
|
12
|
+
},
|
|
13
|
+
],
|
|
14
|
+
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
15
|
+
config: settings2GraphAIConfig(settings, process.env),
|
|
16
|
+
};
|
|
17
|
+
return options;
|
|
18
|
+
};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
2
|
import { getReferenceImagePath } from "../utils/file.js";
|
|
3
|
-
import { graphOption } from "./
|
|
3
|
+
import { graphOption } from "./graph_option.js";
|
|
4
4
|
import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
|
|
5
5
|
import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
|
|
6
6
|
import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { GraphData } from "graphai";
|
|
2
2
|
import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
|
|
3
3
|
export declare const beat_graph_data: {
|
|
4
4
|
version: number;
|
|
@@ -463,7 +463,7 @@ export declare const beat_graph_data: {
|
|
|
463
463
|
};
|
|
464
464
|
};
|
|
465
465
|
export declare const images_graph_data: GraphData;
|
|
466
|
-
export
|
|
466
|
+
export { graphOption } from "./graph_option.js";
|
|
467
467
|
type ImageOptions = {
|
|
468
468
|
imageAgents: Record<string, unknown>;
|
|
469
469
|
};
|
|
@@ -481,4 +481,3 @@ export declare const generateBeatImage: (inputs: {
|
|
|
481
481
|
withBackup?: boolean;
|
|
482
482
|
};
|
|
483
483
|
}) => Promise<void>;
|
|
484
|
-
export {};
|
package/lib/actions/images.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
|
-
import { GraphAI, GraphAILogger
|
|
3
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
4
|
import { AuthenticationError, RateLimitError } from "openai/index.js";
|
|
5
5
|
import * as vanilla from "@graphai/vanilla";
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
@@ -10,12 +10,11 @@ import { imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, movieGenAIAgent
|
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { agentIncorrectAPIKeyError, agentAPIRateLimitError, imageAction, imageFileTarget } from "../utils/error_cause.js";
|
|
12
12
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
13
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
14
|
-
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
15
13
|
import { audioCheckerError } from "../utils/error_cause.js";
|
|
16
14
|
import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
|
|
17
15
|
import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
|
|
18
16
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
import { graphOption } from "./graph_option.js";
|
|
19
18
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
20
19
|
const imageAgents = {
|
|
21
20
|
imageGenAIAgent,
|
|
@@ -432,20 +431,8 @@ export const images_graph_data = {
|
|
|
432
431
|
},
|
|
433
432
|
},
|
|
434
433
|
};
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
agentFilters: [
|
|
438
|
-
{
|
|
439
|
-
name: "fileCacheAgentFilter",
|
|
440
|
-
agent: fileCacheAgentFilter,
|
|
441
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
|
|
442
|
-
},
|
|
443
|
-
],
|
|
444
|
-
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
445
|
-
config: settings2GraphAIConfig(settings, process.env),
|
|
446
|
-
};
|
|
447
|
-
return options;
|
|
448
|
-
};
|
|
434
|
+
// graphOption moved to graph_option.ts to break circular dependency with image_references.ts
|
|
435
|
+
export { graphOption } from "./graph_option.js";
|
|
449
436
|
const prepareGenerateImages = async (context) => {
|
|
450
437
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
451
438
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -13,6 +13,9 @@ export declare const getOutOverlayCoords: (transitionType: string, d: number, t:
|
|
|
13
13
|
export declare const getInOverlayCoords: (transitionType: string, d: number, t: number) => string;
|
|
14
14
|
export declare const getNeedFirstFrame: (context: MulmoStudioContext) => boolean[];
|
|
15
15
|
export declare const getNeedLastFrame: (context: MulmoStudioContext) => boolean[];
|
|
16
|
+
export declare const resolveMovieVolume: (beat: MulmoBeat, context: MulmoStudioContext) => number;
|
|
17
|
+
export declare const isExplicitMixMode: (context: MulmoStudioContext) => boolean;
|
|
18
|
+
export declare const mixAudiosFromMovieBeats: (ffmpegContext: FfmpegContext, artifactAudioId: string, audioIdsFromMovieBeats: string[], context: MulmoStudioContext) => string;
|
|
16
19
|
export declare const getExtraPadding: (context: MulmoStudioContext, index: number) => number;
|
|
17
20
|
export declare const getFillOption: (context: MulmoStudioContext, beat: MulmoBeat) => {
|
|
18
21
|
style: "aspectFit" | "aspectFill";
|
package/lib/actions/movie.js
CHANGED
|
@@ -9,6 +9,7 @@ import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
|
|
|
9
9
|
// const isMac = process.platform === "darwin";
|
|
10
10
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
11
11
|
const VIDEO_FPS = 30;
|
|
12
|
+
const DEFAULT_DUCKING_RATIO = 0.3;
|
|
12
13
|
export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
|
|
13
14
|
const videoId = `v${inputIndex}`;
|
|
14
15
|
const videoFilters = [];
|
|
@@ -241,13 +242,45 @@ export const getNeedLastFrame = (context) => {
|
|
|
241
242
|
return nextTransition !== null; // Any transition on next beat requires this beat's last frame
|
|
242
243
|
});
|
|
243
244
|
};
|
|
244
|
-
const
|
|
245
|
+
export const resolveMovieVolume = (beat, context) => {
|
|
246
|
+
const baseMovieVolume = beat.audioParams?.movieVolume ?? context.presentationStyle.audioParams.movieVolume ?? 1.0;
|
|
247
|
+
const ducking = context.presentationStyle.audioParams.ducking;
|
|
248
|
+
const hasSpeech = !!beat.text && !context.presentationStyle.audioParams.suppressSpeech;
|
|
249
|
+
if (ducking && hasSpeech) {
|
|
250
|
+
const ratio = ducking.ratio ?? DEFAULT_DUCKING_RATIO;
|
|
251
|
+
return baseMovieVolume * ratio;
|
|
252
|
+
}
|
|
253
|
+
return baseMovieVolume;
|
|
254
|
+
};
|
|
255
|
+
export const isExplicitMixMode = (context) => {
|
|
256
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
257
|
+
const duckingRequested = audioParams.ducking !== undefined;
|
|
258
|
+
const speechSuppressed = audioParams.suppressSpeech === true;
|
|
259
|
+
const duckingAffectsMixMode = duckingRequested && !speechSuppressed;
|
|
260
|
+
const hasBeatLevelMovieVolume = context.studio.script.beats.some((beat) => beat.audioParams?.movieVolume !== undefined);
|
|
261
|
+
return hasBeatLevelMovieVolume || audioParams.movieVolume !== undefined || audioParams.ttsVolume !== undefined || duckingAffectsMixMode;
|
|
262
|
+
};
|
|
263
|
+
export const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats, context) => {
|
|
245
264
|
if (audioIdsFromMovieBeats.length > 0) {
|
|
246
265
|
const mainAudioId = "mainaudio";
|
|
247
266
|
const compositeAudioId = "composite";
|
|
248
267
|
const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
|
|
249
|
-
|
|
250
|
-
|
|
268
|
+
const useExplicitMix = isExplicitMixMode(context);
|
|
269
|
+
if (useExplicitMix) {
|
|
270
|
+
// Explicit mode: normalize=0 + limiter.
|
|
271
|
+
// ttsVolume is applied in addBGMAgent to avoid changing BGM level.
|
|
272
|
+
// Ducking is handled at beat level (movieVolume is already adjusted per beat in createVideo)
|
|
273
|
+
const mixedId = "mixed";
|
|
274
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
275
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2:normalize=0[${mixedId}]`);
|
|
276
|
+
// Limiter as failsafe
|
|
277
|
+
ffmpegContext.filterComplex.push(`[${mixedId}]alimiter=limit=0.95:attack=5:release=50[${compositeAudioId}]`);
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
// Legacy mode: normalize=1 (current behavior, fully backward compatible)
|
|
281
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
282
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
283
|
+
}
|
|
251
284
|
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
252
285
|
}
|
|
253
286
|
return artifactAudioId;
|
|
@@ -420,7 +453,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
420
453
|
transitionVideoIds.push(transitionVideoId);
|
|
421
454
|
}
|
|
422
455
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
423
|
-
const movieVolume = beat
|
|
456
|
+
const movieVolume = resolveMovieVolume(beat, context);
|
|
424
457
|
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
425
458
|
// TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
|
|
426
459
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
@@ -442,7 +475,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
|
|
|
442
475
|
}
|
|
443
476
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
444
477
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
445
|
-
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats);
|
|
478
|
+
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, `${audioIndex}:a`, audioIdsFromMovieBeats, context);
|
|
446
479
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
447
480
|
const endTime = performance.now();
|
|
448
481
|
GraphAILogger.info(`Video created successfully! ${Math.round(endTime - start) / 1000} sec`);
|
|
@@ -1,3 +1,13 @@
|
|
|
1
1
|
import type { AgentFunctionInfo } from "graphai";
|
|
2
|
+
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
export declare const resolveAddBgmMixParams: (audioParams: MulmoStudioContext["presentationStyle"]["audioParams"]) => {
|
|
4
|
+
useExplicitMix: boolean;
|
|
5
|
+
voiceVolume: number;
|
|
6
|
+
};
|
|
7
|
+
export declare const resolveAddBgmFilterConfig: (useExplicitMix: boolean) => {
|
|
8
|
+
amixNormalize: string;
|
|
9
|
+
mixedOutputId: string;
|
|
10
|
+
limiterFilter: string | undefined;
|
|
11
|
+
};
|
|
2
12
|
declare const addBGMAgentInfo: AgentFunctionInfo;
|
|
3
13
|
export default addBGMAgentInfo;
|
|
@@ -3,6 +3,22 @@ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput,
|
|
|
3
3
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
4
4
|
import { isFile } from "../utils/file.js";
|
|
5
5
|
import { agentGenerationError, agentFileNotExistError, audioAction, audioFileTarget } from "../utils/error_cause.js";
|
|
6
|
+
export const resolveAddBgmMixParams = (audioParams) => {
|
|
7
|
+
const useExplicitMix = audioParams.ttsVolume !== undefined;
|
|
8
|
+
const ttsVolume = audioParams.ttsVolume ?? 1.0;
|
|
9
|
+
return {
|
|
10
|
+
useExplicitMix,
|
|
11
|
+
voiceVolume: audioParams.audioVolume * ttsVolume,
|
|
12
|
+
};
|
|
13
|
+
};
|
|
14
|
+
export const resolveAddBgmFilterConfig = (useExplicitMix) => {
|
|
15
|
+
const amixNormalize = useExplicitMix ? ":normalize=0" : "";
|
|
16
|
+
return {
|
|
17
|
+
amixNormalize,
|
|
18
|
+
mixedOutputId: useExplicitMix ? "mixed_limited" : "mixed",
|
|
19
|
+
limiterFilter: useExplicitMix ? "[mixed]alimiter=limit=0.95:attack=5:release=50[mixed_limited]" : undefined,
|
|
20
|
+
};
|
|
21
|
+
};
|
|
6
22
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
7
23
|
const { voiceFile, outputFile, context } = namedInputs;
|
|
8
24
|
const { musicFile } = params;
|
|
@@ -24,10 +40,16 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
24
40
|
const ffmpegContext = FfmpegContextInit();
|
|
25
41
|
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
|
|
26
42
|
const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
ffmpegContext.filterComplex.push(`[
|
|
30
|
-
ffmpegContext.filterComplex.push(`[
|
|
43
|
+
const audioParams = context.presentationStyle.audioParams;
|
|
44
|
+
const { useExplicitMix, voiceVolume } = resolveAddBgmMixParams(audioParams);
|
|
45
|
+
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${audioParams.bgmVolume}[music]`);
|
|
46
|
+
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${voiceVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
|
|
47
|
+
const { amixNormalize, mixedOutputId, limiterFilter } = resolveAddBgmFilterConfig(useExplicitMix);
|
|
48
|
+
ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest${amixNormalize}[mixed]`);
|
|
49
|
+
if (limiterFilter) {
|
|
50
|
+
ffmpegContext.filterComplex.push(limiterFilter);
|
|
51
|
+
}
|
|
52
|
+
ffmpegContext.filterComplex.push(`[${mixedOutputId}]atrim=start=0:end=${totalDuration}[trimmed]`);
|
|
31
53
|
ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
|
|
32
54
|
try {
|
|
33
55
|
await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
|
|
@@ -100,7 +100,7 @@ const generateStandardVideo = async (ai, model, prompt, aspectRatio, imagePath,
|
|
|
100
100
|
model,
|
|
101
101
|
prompt,
|
|
102
102
|
config: {
|
|
103
|
-
durationSeconds: capabilities?.
|
|
103
|
+
durationSeconds: capabilities?.supportsDuration === false ? undefined : duration,
|
|
104
104
|
aspectRatio,
|
|
105
105
|
personGeneration: imagePath || !capabilities?.supportsPersonGeneration ? undefined : PersonGeneration.ALLOW_ALL,
|
|
106
106
|
},
|
|
@@ -3,7 +3,14 @@ import { GraphAILogger } from "graphai";
|
|
|
3
3
|
import Replicate from "replicate";
|
|
4
4
|
import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
|
|
5
5
|
import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
|
|
6
|
-
|
|
6
|
+
function replicate_get_videoUrl(output) {
|
|
7
|
+
if (typeof output === "string")
|
|
8
|
+
return output;
|
|
9
|
+
if (output && typeof output === "object" && "url" in output)
|
|
10
|
+
return output.url();
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
13
|
+
async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
|
|
7
14
|
const replicate = new Replicate({
|
|
8
15
|
auth: apiKey,
|
|
9
16
|
});
|
|
@@ -37,6 +44,22 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
37
44
|
input.image = base64Image;
|
|
38
45
|
}
|
|
39
46
|
}
|
|
47
|
+
// Add reference images if provided and model supports it
|
|
48
|
+
const referenceImagesParam = provider2MovieAgent.replicate.modelParams[model]?.reference_images_param;
|
|
49
|
+
if (referenceImages && referenceImages.length > 0) {
|
|
50
|
+
if (!referenceImagesParam) {
|
|
51
|
+
GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support referenceImages — ignoring`);
|
|
52
|
+
}
|
|
53
|
+
else if (imagePath) {
|
|
54
|
+
GraphAILogger.warn(`movieReplicateAgent: referenceImages cannot be combined with first frame image — ignoring referenceImages`);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
input[referenceImagesParam] = referenceImages.map((ref) => {
|
|
58
|
+
const buffer = readFileSync(ref.imagePath);
|
|
59
|
+
return `data:image/png;base64,${buffer.toString("base64")}`;
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
40
63
|
// Add last frame image if provided and model supports it
|
|
41
64
|
if (lastFrameImagePath) {
|
|
42
65
|
const lastImageParam = provider2MovieAgent.replicate.modelParams[model]?.last_image;
|
|
@@ -57,8 +80,9 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
|
|
|
57
80
|
try {
|
|
58
81
|
const output = await replicate.run(model, { input });
|
|
59
82
|
// Download the generated video
|
|
60
|
-
|
|
61
|
-
|
|
83
|
+
// Some models return a FileOutput object with a url() method; others return a plain string URL.
|
|
84
|
+
const videoUrl = replicate_get_videoUrl(output);
|
|
85
|
+
if (videoUrl) {
|
|
62
86
|
const videoResponse = await fetch(videoUrl);
|
|
63
87
|
if (!videoResponse.ok) {
|
|
64
88
|
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`, {
|
|
@@ -89,7 +113,7 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
89
113
|
return "9:16";
|
|
90
114
|
};
|
|
91
115
|
export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
92
|
-
const { prompt, imagePath, lastFrameImagePath } = namedInputs;
|
|
116
|
+
const { prompt, imagePath, lastFrameImagePath, referenceImages } = namedInputs;
|
|
93
117
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
94
118
|
const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
|
|
95
119
|
if (!provider2MovieAgent.replicate.modelParams[model]) {
|
|
@@ -110,7 +134,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
|
110
134
|
});
|
|
111
135
|
}
|
|
112
136
|
try {
|
|
113
|
-
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, aspectRatio, duration);
|
|
137
|
+
const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
|
|
114
138
|
if (buffer) {
|
|
115
139
|
return { buffer };
|
|
116
140
|
}
|
|
@@ -80,6 +80,7 @@ export declare const provider2MovieAgent: {
|
|
|
80
80
|
durations: number[];
|
|
81
81
|
start_image: string | undefined;
|
|
82
82
|
last_image?: string;
|
|
83
|
+
reference_images_param?: string;
|
|
83
84
|
price_per_sec: number;
|
|
84
85
|
}>;
|
|
85
86
|
};
|
|
@@ -90,6 +91,7 @@ export declare const provider2MovieAgent: {
|
|
|
90
91
|
keyName: string;
|
|
91
92
|
modelParams: Record<string, {
|
|
92
93
|
durations: number[];
|
|
94
|
+
supportsDuration: boolean;
|
|
93
95
|
supportsLastFrame: boolean;
|
|
94
96
|
supportsReferenceImages: boolean;
|
|
95
97
|
supportsPersonGeneration: boolean;
|
|
@@ -92,6 +92,9 @@ export const provider2MovieAgent = {
|
|
|
92
92
|
"kwaivgi/kling-v2.1-master",
|
|
93
93
|
"google/veo-2",
|
|
94
94
|
"google/veo-3",
|
|
95
|
+
"google/veo-3.1",
|
|
96
|
+
"google/veo-3.1-fast",
|
|
97
|
+
"google/veo-3.1-lite",
|
|
95
98
|
"google/veo-3-fast",
|
|
96
99
|
"minimax/video-01",
|
|
97
100
|
"minimax/hailuo-02",
|
|
@@ -99,6 +102,11 @@ export const provider2MovieAgent = {
|
|
|
99
102
|
"pixverse/pixverse-v4.5",
|
|
100
103
|
"wan-video/wan-2.2-i2v-fast",
|
|
101
104
|
"wan-video/wan-2.2-t2v-fast",
|
|
105
|
+
"xai/grok-imagine-video",
|
|
106
|
+
"xai/grok-imagine-r2v",
|
|
107
|
+
"runwayml/gen-4.5",
|
|
108
|
+
"kwaivgi/kling-v3-omni-video",
|
|
109
|
+
"kwaivgi/kling-v3-video",
|
|
102
110
|
],
|
|
103
111
|
modelParams: {
|
|
104
112
|
"bytedance/seedance-1-lite": {
|
|
@@ -138,6 +146,25 @@ export const provider2MovieAgent = {
|
|
|
138
146
|
start_image: "image",
|
|
139
147
|
price_per_sec: 0.75,
|
|
140
148
|
},
|
|
149
|
+
"google/veo-3.1": {
|
|
150
|
+
durations: [4, 6, 8],
|
|
151
|
+
start_image: "image",
|
|
152
|
+
last_image: "last_frame_image",
|
|
153
|
+
reference_images_param: "reference_images",
|
|
154
|
+
price_per_sec: 0.75,
|
|
155
|
+
},
|
|
156
|
+
"google/veo-3.1-fast": {
|
|
157
|
+
durations: [4, 6, 8],
|
|
158
|
+
start_image: "image",
|
|
159
|
+
last_image: "last_frame_image",
|
|
160
|
+
price_per_sec: 0.4,
|
|
161
|
+
},
|
|
162
|
+
"google/veo-3.1-lite": {
|
|
163
|
+
durations: [4, 6, 8],
|
|
164
|
+
start_image: "image",
|
|
165
|
+
last_image: "last_frame",
|
|
166
|
+
price_per_sec: 0.05,
|
|
167
|
+
},
|
|
141
168
|
"google/veo-3-fast": {
|
|
142
169
|
durations: [8],
|
|
143
170
|
start_image: "image",
|
|
@@ -175,28 +202,68 @@ export const provider2MovieAgent = {
|
|
|
175
202
|
start_image: undefined,
|
|
176
203
|
price_per_sec: 0.012,
|
|
177
204
|
},
|
|
205
|
+
"xai/grok-imagine-video": {
|
|
206
|
+
durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
207
|
+
start_image: "image",
|
|
208
|
+
price_per_sec: 0.08,
|
|
209
|
+
},
|
|
210
|
+
"xai/grok-imagine-r2v": {
|
|
211
|
+
durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
212
|
+
start_image: undefined,
|
|
213
|
+
reference_images_param: "reference_images",
|
|
214
|
+
price_per_sec: 0.08,
|
|
215
|
+
},
|
|
216
|
+
"runwayml/gen-4.5": {
|
|
217
|
+
durations: [5, 10],
|
|
218
|
+
start_image: "image",
|
|
219
|
+
price_per_sec: 0.25,
|
|
220
|
+
},
|
|
221
|
+
"kwaivgi/kling-v3-omni-video": {
|
|
222
|
+
durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
223
|
+
start_image: "start_image",
|
|
224
|
+
last_image: "end_image",
|
|
225
|
+
reference_images_param: "reference_images",
|
|
226
|
+
price_per_sec: 0.3,
|
|
227
|
+
},
|
|
228
|
+
"kwaivgi/kling-v3-video": {
|
|
229
|
+
durations: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
|
230
|
+
start_image: "start_image",
|
|
231
|
+
last_image: "end_image",
|
|
232
|
+
reference_images_param: "reference_images",
|
|
233
|
+
price_per_sec: 0.3,
|
|
234
|
+
},
|
|
178
235
|
},
|
|
179
236
|
},
|
|
180
237
|
google: {
|
|
181
238
|
agentName: "movieGenAIAgent",
|
|
182
239
|
defaultModel: "veo-2.0-generate-001",
|
|
183
|
-
models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
|
|
240
|
+
models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-lite-generate-preview"],
|
|
184
241
|
keyName: "GEMINI_API_KEY",
|
|
185
242
|
modelParams: {
|
|
243
|
+
"veo-3.1-lite-generate-preview": {
|
|
244
|
+
durations: [4, 6, 8],
|
|
245
|
+
supportsDuration: true,
|
|
246
|
+
supportsLastFrame: true,
|
|
247
|
+
supportsReferenceImages: false,
|
|
248
|
+
supportsPersonGeneration: false,
|
|
249
|
+
},
|
|
186
250
|
"veo-3.1-generate-preview": {
|
|
187
251
|
durations: [4, 6, 8],
|
|
252
|
+
supportsDuration: true,
|
|
188
253
|
supportsLastFrame: true,
|
|
189
254
|
supportsReferenceImages: true,
|
|
190
255
|
supportsPersonGeneration: false,
|
|
191
256
|
},
|
|
192
257
|
"veo-3.0-generate-001": {
|
|
193
|
-
durations: [
|
|
258
|
+
durations: [8],
|
|
259
|
+
supportsDuration: false, // Veo 3.0 always generates 8s
|
|
194
260
|
supportsLastFrame: false,
|
|
195
261
|
supportsReferenceImages: false,
|
|
196
262
|
supportsPersonGeneration: false,
|
|
197
263
|
},
|
|
198
264
|
"veo-2.0-generate-001": {
|
|
199
|
-
durations: [5, 6,
|
|
265
|
+
durations: [5, 6, 8],
|
|
266
|
+
supportsDuration: true,
|
|
200
267
|
supportsLastFrame: false, // Vertex AI only
|
|
201
268
|
supportsReferenceImages: false,
|
|
202
269
|
supportsPersonGeneration: true,
|
|
@@ -229,7 +296,7 @@ export const provider2LipSyncAgent = {
|
|
|
229
296
|
agentName: "lipSyncReplicateAgent",
|
|
230
297
|
defaultModel: "bytedance/omni-human",
|
|
231
298
|
keyName: "REPLICATE_API_TOKEN",
|
|
232
|
-
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
|
|
299
|
+
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human", "pixverse/lipsync"],
|
|
233
300
|
modelParams: {
|
|
234
301
|
"bytedance/latentsync": {
|
|
235
302
|
identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
|
|
@@ -247,14 +314,20 @@ export const provider2LipSyncAgent = {
|
|
|
247
314
|
audio: "audio",
|
|
248
315
|
price_per_sec: 0.14,
|
|
249
316
|
},
|
|
317
|
+
"pixverse/lipsync": {
|
|
318
|
+
identifier: "pixverse/lipsync:3ca6d73f4fb9e1d77a4b6e14f8998ee18926e4dc462838e31fa2bb5e662c1e2c",
|
|
319
|
+
video: "video",
|
|
320
|
+
audio: "audio",
|
|
321
|
+
},
|
|
250
322
|
/* NOTE: This model does not work with large base64 urls.
|
|
251
323
|
"sync/lipsync-2": {
|
|
252
324
|
video: "video",
|
|
253
325
|
audio: "audio",
|
|
254
326
|
},
|
|
255
327
|
*/
|
|
256
|
-
/* NOTE: This model does not work
|
|
328
|
+
/* NOTE: This model does not work with base64 data URIs (error 1201).
|
|
257
329
|
"kwaivgi/kling-lip-sync": {
|
|
330
|
+
identifier: "kwaivgi/kling-lip-sync:8311467f07043d4b3feb44584d2586bfa2fc70203eca612ed26f84d0b55df3ce",
|
|
258
331
|
video: "video_url",
|
|
259
332
|
audio: "audio_file",
|
|
260
333
|
},
|
package/lib/types/schema.d.ts
CHANGED
|
@@ -3370,7 +3370,7 @@ export declare const mulmoSlideParamsSchema: z.ZodObject<{
|
|
|
3370
3370
|
}, z.core.$strict>;
|
|
3371
3371
|
export declare const beatAudioParamsSchema: z.ZodObject<{
|
|
3372
3372
|
padding: z.ZodOptional<z.ZodNumber>;
|
|
3373
|
-
movieVolume: z.
|
|
3373
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
3374
3374
|
}, z.core.$strict>;
|
|
3375
3375
|
export declare const mulmoHtmlImageParamsSchema: z.ZodObject<{
|
|
3376
3376
|
model: z.ZodOptional<z.ZodString>;
|
|
@@ -3393,6 +3393,11 @@ export declare const audioParamsSchema: z.ZodObject<{
|
|
|
3393
3393
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
3394
3394
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
3395
3395
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
3396
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
3397
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
3398
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
3399
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
3400
|
+
}, z.core.$strip>>;
|
|
3396
3401
|
}, z.core.$strict>;
|
|
3397
3402
|
export declare const htmlPromptParamsSchema: z.ZodObject<{
|
|
3398
3403
|
systemPrompt: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
@@ -6336,7 +6341,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
|
|
|
6336
6341
|
}, z.core.$strict>>;
|
|
6337
6342
|
audioParams: z.ZodOptional<z.ZodObject<{
|
|
6338
6343
|
padding: z.ZodOptional<z.ZodNumber>;
|
|
6339
|
-
movieVolume: z.
|
|
6344
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
6340
6345
|
}, z.core.$strict>>;
|
|
6341
6346
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
6342
6347
|
provider: z.ZodOptional<z.ZodEnum<{
|
|
@@ -7181,6 +7186,11 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
|
|
|
7181
7186
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
7182
7187
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
7183
7188
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
7189
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
7190
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
7191
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
7192
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
7193
|
+
}, z.core.$strip>>;
|
|
7184
7194
|
}, z.core.$strict>>;
|
|
7185
7195
|
}, z.core.$strip>;
|
|
7186
7196
|
export declare const mulmoReferenceSchema: z.ZodObject<{
|
|
@@ -7677,6 +7687,11 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
7677
7687
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
7678
7688
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
7679
7689
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
7690
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
7691
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
7692
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
7693
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
7694
|
+
}, z.core.$strip>>;
|
|
7680
7695
|
}, z.core.$strict>>;
|
|
7681
7696
|
title: z.ZodOptional<z.ZodString>;
|
|
7682
7697
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -10345,7 +10360,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
10345
10360
|
}, z.core.$strict>>;
|
|
10346
10361
|
audioParams: z.ZodOptional<z.ZodObject<{
|
|
10347
10362
|
padding: z.ZodOptional<z.ZodNumber>;
|
|
10348
|
-
movieVolume: z.
|
|
10363
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
10349
10364
|
}, z.core.$strict>>;
|
|
10350
10365
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
10351
10366
|
provider: z.ZodOptional<z.ZodEnum<{
|
|
@@ -11265,6 +11280,11 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
11265
11280
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
11266
11281
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
11267
11282
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
11283
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
11284
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
11285
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
11286
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
11287
|
+
}, z.core.$strip>>;
|
|
11268
11288
|
}, z.core.$strict>>;
|
|
11269
11289
|
title: z.ZodOptional<z.ZodString>;
|
|
11270
11290
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -13933,7 +13953,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
13933
13953
|
}, z.core.$strict>>;
|
|
13934
13954
|
audioParams: z.ZodOptional<z.ZodObject<{
|
|
13935
13955
|
padding: z.ZodOptional<z.ZodNumber>;
|
|
13936
|
-
movieVolume: z.
|
|
13956
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
13937
13957
|
}, z.core.$strict>>;
|
|
13938
13958
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
13939
13959
|
provider: z.ZodOptional<z.ZodEnum<{
|
|
@@ -14789,6 +14809,11 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
|
|
|
14789
14809
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
14790
14810
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
14791
14811
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
14812
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
14813
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
14814
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
14815
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
14816
|
+
}, z.core.$strip>>;
|
|
14792
14817
|
}, z.core.$strict>>;
|
|
14793
14818
|
}, z.core.$strip>>;
|
|
14794
14819
|
}, z.core.$strict>;
|
|
@@ -15279,6 +15304,11 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
|
|
|
15279
15304
|
bgmVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
15280
15305
|
audioVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
15281
15306
|
suppressSpeech: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
15307
|
+
movieVolume: z.ZodOptional<z.ZodNumber>;
|
|
15308
|
+
ttsVolume: z.ZodOptional<z.ZodNumber>;
|
|
15309
|
+
ducking: z.ZodOptional<z.ZodObject<{
|
|
15310
|
+
ratio: z.ZodOptional<z.ZodNumber>;
|
|
15311
|
+
}, z.core.$strip>>;
|
|
15282
15312
|
}, z.core.$strict>>;
|
|
15283
15313
|
}, z.core.$strip>>;
|
|
15284
15314
|
filename: z.ZodString;
|
package/lib/types/schema.js
CHANGED
|
@@ -409,7 +409,7 @@ export const mulmoSlideParamsSchema = z
|
|
|
409
409
|
export const beatAudioParamsSchema = z
|
|
410
410
|
.object({
|
|
411
411
|
padding: z.number().optional().describe("Padding between beats"), // seconds
|
|
412
|
-
movieVolume: z.number().
|
|
412
|
+
movieVolume: z.number().min(0).max(1).optional().describe("Audio volume of the imported or generated movie"),
|
|
413
413
|
})
|
|
414
414
|
.strict();
|
|
415
415
|
export const mulmoHtmlImageParamsSchema = z
|
|
@@ -428,6 +428,14 @@ export const audioParamsSchema = z
|
|
|
428
428
|
bgmVolume: z.number().optional().default(0.2).describe("Volume of the background music"),
|
|
429
429
|
audioVolume: z.number().optional().default(1.0).describe("Volume of the audio"),
|
|
430
430
|
suppressSpeech: z.boolean().optional().default(false).describe("Suppress speech generation"),
|
|
431
|
+
movieVolume: z.number().min(0).max(1).optional().describe("Default movie audio volume for all beats"),
|
|
432
|
+
ttsVolume: z.number().min(0).max(2).optional().describe("TTS narration volume before mixing with BGM/movie audio"),
|
|
433
|
+
ducking: z
|
|
434
|
+
.object({
|
|
435
|
+
ratio: z.number().min(0).max(1).optional().describe("Movie volume ratio during TTS beats (default 0.3)"),
|
|
436
|
+
})
|
|
437
|
+
.optional()
|
|
438
|
+
.describe("Auto-reduce movie audio when TTS is playing"),
|
|
431
439
|
})
|
|
432
440
|
.strict();
|
|
433
441
|
export const htmlPromptParamsSchema = z
|