mulmocast 2.6.6 → 2.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
1
  import dotenv from "dotenv";
2
- import { GraphAI, TaskManager, GraphAILogger } from "graphai";
2
+ import { GraphAI, GraphAILogger } from "graphai";
3
3
  import * as agents from "@graphai/vanilla";
4
4
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
5
  import { ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
6
- import { text2SpeechProviderSchema } from "../types/index.js";
7
- import { fileCacheAgentFilter } from "../utils/filters.js";
6
+ import { audioGraphOption } from "./graph_option.js";
8
7
  import { getAudioArtifactFilePath, getAudioFilePath, getGroupedAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
9
- import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
8
+ import { localizedText } from "../utils/utils.js";
10
9
  import { text2hash } from "../utils/utils_node.js";
11
10
  import { provider2TTSAgent } from "../types/provider2agent.js";
12
11
  import { invalidAudioSourceError } from "../utils/error_cause.js";
@@ -210,21 +209,6 @@ export const audio_graph_data = {
210
209
  },
211
210
  },
212
211
  };
213
- const agentFilters = [
214
- {
215
- name: "fileCacheAgentFilter",
216
- agent: fileCacheAgentFilter,
217
- nodeIds: ["tts"],
218
- },
219
- ];
220
- const getConcurrency = (context) => {
221
- // Check if any speaker uses elevenlabs or kotodama (providers that require concurrency = 1)
222
- const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
223
- const provider = text2SpeechProviderSchema.parse(speaker.provider);
224
- return provider2TTSAgent[provider].hasLimitedConcurrency;
225
- });
226
- return hasLimitedConcurrencyProvider ? 1 : 8;
227
- };
228
212
  const audioAgents = {
229
213
  ...vanillaAgents,
230
214
  fileWriteAgent,
@@ -246,9 +230,8 @@ export const generateBeatAudio = async (index, context, args) => {
246
230
  const audioSegmentDirPath = context.fileDirs.grouped ? audioDirPath : resolveDirPath(audioDirPath, fileName);
247
231
  mkdir(outDirPath);
248
232
  mkdir(audioSegmentDirPath);
249
- const config = settings2GraphAIConfig(settings);
250
- const taskManager = new TaskManager(getConcurrency(context));
251
- const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
233
+ const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, await audioGraphOption(context, settings));
234
+ callbacks?.forEach((callback) => graph.registerCallback(callback));
252
235
  graph.injectValue("__mapIndex", index);
253
236
  graph.injectValue("beat", context.studio.script.beats[index]);
254
237
  graph.injectValue("studioBeat", context.studio.beats[index]);
@@ -260,11 +243,6 @@ export const generateBeatAudio = async (index, context, args) => {
260
243
  else {
261
244
  graph.injectValue("lang", context.lang);
262
245
  }
263
- if (callbacks) {
264
- callbacks.forEach((callback) => {
265
- graph.registerCallback(callback);
266
- });
267
- }
268
246
  await graph.run();
269
247
  }
270
248
  catch (error) {
@@ -288,19 +266,13 @@ export const audio = async (context, args) => {
288
266
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
289
267
  mkdir(outDirPath);
290
268
  mkdir(audioSegmentDirPath);
291
- const config = settings2GraphAIConfig(settings, process.env);
292
- const taskManager = new TaskManager(getConcurrency(context));
293
- const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
269
+ const graph = new GraphAI(audio_graph_data, audioAgents, await audioGraphOption(context, settings));
270
+ callbacks?.forEach((callback) => graph.registerCallback(callback));
294
271
  graph.injectValue("context", context);
295
272
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
296
273
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
297
274
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
298
275
  graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
299
- if (callbacks) {
300
- callbacks.forEach((callback) => {
301
- graph.registerCallback(callback);
302
- });
303
- }
304
276
  const result = await graph.run();
305
277
  writingMessage(audioCombinedFilePath);
306
278
  MulmoStudioContextMethods.setSessionState(context, "audio", false, true);
@@ -1,3 +1,4 @@
1
1
  import type { GraphOptions } from "graphai";
2
2
  import { MulmoStudioContext } from "../types/index.js";
3
- export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
3
+ export declare const imageGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
4
+ export declare const audioGraphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
@@ -2,17 +2,18 @@ import { TaskManager } from "graphai";
2
2
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
3
3
  import { fileCacheAgentFilter } from "../utils/filters.js";
4
4
  import { settings2GraphAIConfig } from "../utils/utils.js";
5
- export const graphOption = async (context, settings) => {
6
- const options = {
7
- agentFilters: [
8
- {
9
- name: "fileCacheAgentFilter",
10
- agent: fileCacheAgentFilter,
11
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"],
12
- },
13
- ],
14
- taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
15
- config: settings2GraphAIConfig(settings, process.env),
16
- };
17
- return options;
18
- };
5
+ const createGraphOption = (concurrency, cacheNodeIds, settings) => ({
6
+ agentFilters: [
7
+ {
8
+ name: "fileCacheAgentFilter",
9
+ agent: fileCacheAgentFilter,
10
+ nodeIds: cacheNodeIds,
11
+ },
12
+ ],
13
+ taskManager: new TaskManager(concurrency),
14
+ config: settings2GraphAIConfig(settings, process.env),
15
+ });
16
+ const IMAGE_CACHE_NODE_IDS = ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator", "AudioTrimmer"];
17
+ const AUDIO_CACHE_NODE_IDS = ["tts"];
18
+ export const imageGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getImageConcurrency(context.presentationStyle), IMAGE_CACHE_NODE_IDS, settings);
19
+ export const audioGraphOption = async (context, settings) => createGraphOption(MulmoPresentationStyleMethods.getAudioConcurrency(context.presentationStyle), AUDIO_CACHE_NODE_IDS, settings);
@@ -1,6 +1,6 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
2
  import { getReferenceImagePath } from "../utils/file.js";
3
- import { graphOption } from "./graph_option.js";
3
+ import { imageGraphOption } from "./graph_option.js";
4
4
  import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
5
5
  import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
6
6
  import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
@@ -41,7 +41,7 @@ export const generateReferenceImage = async (inputs) => {
41
41
  },
42
42
  };
43
43
  try {
44
- const options = await graphOption(context);
44
+ const options = await imageGraphOption(context);
45
45
  const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent, imageReplicateAgent }, options);
46
46
  await graph.run();
47
47
  return imagePath;
@@ -121,12 +121,13 @@ const generateReferenceMovie = async (inputs) => {
121
121
  params: {
122
122
  model: movieAgentInfo.movieParams.model,
123
123
  canvasSize: context.presentationStyle.canvasSize,
124
+ generateAudio: movieAgentInfo.movieParams.generateAudio,
124
125
  },
125
126
  },
126
127
  },
127
128
  };
128
129
  try {
129
- const options = await graphOption(context);
130
+ const options = await imageGraphOption(context);
130
131
  const graph = new GraphAI(movie_graph_data, { movieGenAIAgent, movieReplicateAgent, mediaMockAgent }, options);
131
132
  await graph.run();
132
133
  return moviePath;
@@ -324,6 +324,7 @@ export declare const beat_graph_data: {
324
324
  canvasSize: string;
325
325
  vertexai_project: string;
326
326
  vertexai_location: string;
327
+ generateAudio: string;
327
328
  };
328
329
  };
329
330
  defaultValue: {};
@@ -463,7 +464,7 @@ export declare const beat_graph_data: {
463
464
  };
464
465
  };
465
466
  export declare const images_graph_data: GraphData;
466
- export { graphOption } from "./graph_option.js";
467
+ export { imageGraphOption } from "./graph_option.js";
467
468
  type ImageOptions = {
468
469
  imageAgents: Record<string, unknown>;
469
470
  };
@@ -14,7 +14,7 @@ import { audioCheckerError } from "../utils/error_cause.js";
14
14
  import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
15
15
  import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
16
16
  import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
- import { graphOption } from "./graph_option.js";
17
+ import { imageGraphOption } from "./graph_option.js";
18
18
  const vanillaAgents = vanilla.default ?? vanilla;
19
19
  const imageAgents = {
20
20
  imageGenAIAgent,
@@ -194,6 +194,7 @@ export const beat_graph_data = {
194
194
  canvasSize: ":context.presentationStyle.canvasSize",
195
195
  vertexai_project: ":preprocessor.movieAgentInfo.movieParams.vertexai_project",
196
196
  vertexai_location: ":preprocessor.movieAgentInfo.movieParams.vertexai_location",
197
+ generateAudio: ":preprocessor.movieAgentInfo.movieParams.generateAudio",
197
198
  },
198
199
  },
199
200
  defaultValue: {},
@@ -431,8 +432,7 @@ export const images_graph_data = {
431
432
  },
432
433
  },
433
434
  };
434
- // graphOption moved to graph_option.ts to break circular dependency with image_references.ts
435
- export { graphOption } from "./graph_option.js";
435
+ export { imageGraphOption } from "./graph_option.js";
436
436
  const prepareGenerateImages = async (context) => {
437
437
  const fileName = MulmoStudioContextMethods.getFileName(context);
438
438
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -459,7 +459,7 @@ const generateImages = async (context, args) => {
459
459
  ...defaultAgents,
460
460
  ...optionImageAgents,
461
461
  };
462
- const graph = new GraphAI(images_graph_data, graphaiAgent, await graphOption(context, settings));
462
+ const graph = new GraphAI(images_graph_data, graphaiAgent, await imageGraphOption(context, settings));
463
463
  Object.keys(injections).forEach((key) => {
464
464
  graph.injectValue(key, injections[key]);
465
465
  });
@@ -499,7 +499,7 @@ export const generateBeatImage = async (inputs) => {
499
499
  try {
500
500
  const { index, context, args } = inputs;
501
501
  const { settings, callbacks, forceMovie, forceImage, forceLipSync, forceSoundEffect, withBackup } = args ?? {};
502
- const options = await graphOption(context, settings);
502
+ const options = await imageGraphOption(context, settings);
503
503
  const injections = await prepareGenerateImages(context);
504
504
  const graph = new GraphAI(beat_graph_data, defaultAgents, options);
505
505
  Object.keys(injections).forEach((key) => {
@@ -1,10 +1,10 @@
1
1
  import { readFileSync, writeFileSync } from "fs";
2
2
  import { GraphAILogger, sleep } from "graphai";
3
3
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
4
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, hasCause, } from "../utils/error_cause.js";
4
+ import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, hasCause, } from "../utils/error_cause.js";
5
5
  import { getAspectRatio } from "../utils/utils.js";
6
6
  import { ASPECT_RATIOS } from "../types/const.js";
7
- import { getModelDuration, provider2MovieAgent } from "../types/provider2agent.js";
7
+ import { getModelDuration, provider2MovieAgent, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
8
8
  const pollUntilDone = async (ai, operation) => {
9
9
  const response = { operation };
10
10
  while (!response.operation.done) {
@@ -154,6 +154,18 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
154
154
  cause: agentGenerationError("movieGenAIAgent", imageAction, videoDurationTarget),
155
155
  });
156
156
  }
157
+ // Check generateAudio compatibility (Google API has no toggle)
158
+ if (params.generateAudio !== undefined) {
159
+ const audio = provider2MovieAgent.google.modelParams[model]?.audio ?? { mode: AUDIO_MODE_NEVER };
160
+ if (audio.mode === AUDIO_MODE_NEVER && params.generateAudio === true) {
161
+ throw new Error(`Model ${model} does not support audio generation`, {
162
+ cause: agentGenerationError("movieGenAIAgent", imageAction, unsupportedModelTarget),
163
+ });
164
+ }
165
+ else if (audio.mode === AUDIO_MODE_ALWAYS && params.generateAudio === false) {
166
+ GraphAILogger.warn(`movieGenAIAgent: model ${model} always generates audio — ignoring generateAudio=false`);
167
+ }
168
+ }
157
169
  const isVertexAI = !!params.vertexai_project;
158
170
  const ai = isVertexAI
159
171
  ? new GoogleGenAI({
@@ -1,8 +1,8 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
- import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
4
+ import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, hasCause, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
+ import { provider2MovieAgent, getModelDuration, AUDIO_MODE_OPTIONAL, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
6
6
  function replicate_get_videoUrl(output) {
7
7
  if (typeof output === "string")
8
8
  return output;
@@ -10,7 +10,7 @@ function replicate_get_videoUrl(output) {
10
10
  return output.url();
11
11
  return undefined;
12
12
  }
13
- async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
13
+ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, generateAudio) {
14
14
  const replicate = new Replicate({
15
15
  auth: apiKey,
16
16
  });
@@ -77,6 +77,21 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
77
77
  GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support lastFrame — ignoring lastFrameImageName`);
78
78
  }
79
79
  }
80
+ // Add generate_audio if the model supports it
81
+ const audio = provider2MovieAgent.replicate.modelParams[model].audio;
82
+ if (generateAudio !== undefined) {
83
+ if (audio.mode === AUDIO_MODE_OPTIONAL) {
84
+ input[audio.param] = generateAudio;
85
+ }
86
+ else if (audio.mode === AUDIO_MODE_NEVER && generateAudio === true) {
87
+ throw new Error(`Model ${model} does not support audio generation`, {
88
+ cause: agentGenerationError("movieReplicateAgent", imageAction, unsupportedModelTarget),
89
+ });
90
+ }
91
+ else if (audio.mode === AUDIO_MODE_ALWAYS && generateAudio === false) {
92
+ GraphAILogger.warn(`movieReplicateAgent: model ${model} always generates audio — ignoring generateAudio=false`);
93
+ }
94
+ }
80
95
  try {
81
96
  const output = await replicate.run(model, { input });
82
97
  // Download the generated video
@@ -134,12 +149,15 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
134
149
  });
135
150
  }
136
151
  try {
137
- const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
152
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, params.generateAudio);
138
153
  if (buffer) {
139
154
  return { buffer };
140
155
  }
141
156
  }
142
157
  catch (error) {
158
+ if (hasCause(error)) {
159
+ throw error;
160
+ }
143
161
  GraphAILogger.info("Failed to generate movie:", error.message);
144
162
  }
145
163
  throw new Error("ERROR: generateMovie returned undefined", {
@@ -179,6 +179,8 @@ export declare const MulmoPresentationStyleMethods: {
179
179
  imageName: string;
180
180
  referenceType: "ASSET" | "STYLE";
181
181
  }[] | undefined;
182
+ concurrency?: number | undefined;
183
+ generateAudio?: boolean | undefined;
182
184
  speed?: number | undefined;
183
185
  };
184
186
  keyName: string;
@@ -204,7 +206,10 @@ export declare const MulmoPresentationStyleMethods: {
204
206
  image?: string;
205
207
  }>;
206
208
  };
207
- getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
209
+ /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
210
+ getImageConcurrency(presentationStyle: MulmoPresentationStyle): number;
211
+ /** Concurrency for audio/TTS generation graph */
212
+ getAudioConcurrency(presentationStyle: MulmoPresentationStyle): number;
208
213
  getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
209
214
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
210
215
  };
@@ -6,10 +6,10 @@
6
6
  import { isNull } from "graphai";
7
7
  import { userAssert } from "../utils/utils.js";
8
8
  import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
9
- import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
9
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2TTSAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
10
10
  const defaultTextSlideStyles = [
11
11
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
12
- "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
12
+ "body { margin: 60px; margin-top: 40px; color:#333; background-color:#fff; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
13
13
  "h1 { font-size: 56px; margin-bottom: 20px; text-align: center }",
14
14
  "h2 { font-size: 48px; text-align: center }",
15
15
  "h3 { font-size: 36px }",
@@ -119,7 +119,17 @@ export const MulmoPresentationStyleMethods = {
119
119
  const agentInfo = provider2LipSyncAgent[lipSyncProvider];
120
120
  return agentInfo;
121
121
  },
122
- getConcurrency(presentationStyle) {
122
+ /** Concurrency for image/movie generation graph (uses min of imageParams/movieParams) */
123
+ getImageConcurrency(presentationStyle) {
124
+ const imageConcurrency = presentationStyle.imageParams?.concurrency;
125
+ const movieConcurrency = presentationStyle.movieParams?.concurrency;
126
+ // User-specified concurrency takes precedence.
127
+ // Use the smaller of imageParams/movieParams since they share the same graph.
128
+ if (imageConcurrency !== undefined || movieConcurrency !== undefined) {
129
+ const values = [imageConcurrency, movieConcurrency].filter((v) => v !== undefined);
130
+ return Math.min(...values);
131
+ }
132
+ // Fallback: provider-based auto-detection
123
133
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
124
134
  if (imageAgentInfo.imageParams.provider === "openai") {
125
135
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -131,6 +141,20 @@ export const MulmoPresentationStyleMethods = {
131
141
  }
132
142
  return 4;
133
143
  },
144
+ /** Concurrency for audio/TTS generation graph */
145
+ getAudioConcurrency(presentationStyle) {
146
+ // User-specified concurrency takes precedence
147
+ const userConcurrency = presentationStyle.audioParams?.concurrency;
148
+ if (userConcurrency !== undefined) {
149
+ return userConcurrency;
150
+ }
151
+ // Fallback: provider-based auto-detection
152
+ const hasLimitedConcurrencyProvider = Object.values(presentationStyle.speechParams.speakers).some((speaker) => {
153
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
154
+ return provider2TTSAgent[provider].hasLimitedConcurrency;
155
+ });
156
+ return hasLimitedConcurrencyProvider ? 1 : 8;
157
+ },
134
158
  getHtmlImageAgentInfo(presentationStyle) {
135
159
  const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
136
160
  const defaultConfig = provider2LLMAgent[provider];
@@ -84,6 +84,7 @@ export type GoogleMovieAgentParams = ImageAgentParams & {
84
84
  duration?: number;
85
85
  vertexai_project?: string;
86
86
  vertexai_location?: string;
87
+ generateAudio?: boolean;
87
88
  };
88
89
  export type ReplicateMovieAgentParams = {
89
90
  model: `${string}/${string}` | undefined;
@@ -92,6 +93,7 @@ export type ReplicateMovieAgentParams = {
92
93
  height: number;
93
94
  };
94
95
  duration?: number;
96
+ generateAudio?: boolean;
95
97
  };
96
98
  export type ReplicateSoundEffectAgentParams = {
97
99
  model: `${string}/${string}` | undefined;
@@ -70,32 +70,47 @@ export declare const provider2ImageAgent: {
70
70
  };
71
71
  };
72
72
  export type ReplicateModel = `${string}/${string}`;
73
+ export declare const AUDIO_MODE_NEVER: "never";
74
+ export declare const AUDIO_MODE_ALWAYS: "always";
75
+ export declare const AUDIO_MODE_OPTIONAL: "optional";
76
+ type MovieAudioSpec = {
77
+ mode: typeof AUDIO_MODE_NEVER;
78
+ } | {
79
+ mode: typeof AUDIO_MODE_ALWAYS;
80
+ } | {
81
+ mode: typeof AUDIO_MODE_OPTIONAL;
82
+ param: string;
83
+ };
84
+ type ReplicateMovieModelParams = {
85
+ durations: number[];
86
+ start_image: string | undefined;
87
+ last_image?: string;
88
+ reference_images_param?: string;
89
+ audio: MovieAudioSpec;
90
+ price_per_sec: number;
91
+ };
92
+ type GoogleMovieModelParams = {
93
+ durations: number[];
94
+ supportsDuration: boolean;
95
+ supportsLastFrame: boolean;
96
+ supportsReferenceImages: boolean;
97
+ supportsPersonGeneration: boolean;
98
+ audio: MovieAudioSpec;
99
+ };
73
100
  export declare const provider2MovieAgent: {
74
101
  replicate: {
75
102
  agentName: string;
76
103
  defaultModel: ReplicateModel;
77
104
  keyName: string;
78
105
  models: string[];
79
- modelParams: Record<ReplicateModel, {
80
- durations: number[];
81
- start_image: string | undefined;
82
- last_image?: string;
83
- reference_images_param?: string;
84
- price_per_sec: number;
85
- }>;
106
+ modelParams: Record<ReplicateModel, ReplicateMovieModelParams>;
86
107
  };
87
108
  google: {
88
109
  agentName: string;
89
110
  defaultModel: string;
90
111
  models: string[];
91
112
  keyName: string;
92
- modelParams: Record<string, {
93
- durations: number[];
94
- supportsDuration: boolean;
95
- supportsLastFrame: boolean;
96
- supportsReferenceImages: boolean;
97
- supportsPersonGeneration: boolean;
98
- }>;
113
+ modelParams: Record<string, GoogleMovieModelParams>;
99
114
  };
100
115
  mock: {
101
116
  agentName: string;
@@ -182,3 +197,4 @@ export declare const llm: (keyof typeof provider2LLMAgent)[];
182
197
  export type LLM = keyof typeof provider2LLMAgent;
183
198
  export declare const htmlLLMProvider: string[];
184
199
  export declare const getModelDuration: (provider: keyof typeof provider2MovieAgent, model: string, movieDuration?: number) => number | undefined;
200
+ export {};
@@ -79,6 +79,9 @@ export const provider2ImageAgent = {
79
79
  keyName: "",
80
80
  },
81
81
  };
82
+ export const AUDIO_MODE_NEVER = "never";
83
+ export const AUDIO_MODE_ALWAYS = "always";
84
+ export const AUDIO_MODE_OPTIONAL = "optional";
82
85
  export const provider2MovieAgent = {
83
86
  replicate: {
84
87
  agentName: "movieReplicateAgent",
@@ -87,6 +90,8 @@ export const provider2MovieAgent = {
87
90
  models: [
88
91
  "bytedance/seedance-1-lite",
89
92
  "bytedance/seedance-1-pro",
93
+ "bytedance/seedance-2.0",
94
+ "bytedance/seedance-2.0-fast",
90
95
  "kwaivgi/kling-v1.6-pro",
91
96
  "kwaivgi/kling-v2.1",
92
97
  "kwaivgi/kling-v2.1-master",
@@ -113,37 +118,58 @@ export const provider2MovieAgent = {
113
118
  durations: [5, 10],
114
119
  start_image: "image",
115
120
  last_image: "last_frame_image",
121
+ audio: { mode: AUDIO_MODE_NEVER },
116
122
  price_per_sec: 0.036, // in USD
117
123
  },
118
124
  "bytedance/seedance-1-pro": {
119
125
  durations: [5, 10],
120
126
  start_image: "image",
121
127
  last_image: "last_frame_image",
128
+ audio: { mode: AUDIO_MODE_NEVER },
122
129
  price_per_sec: 0.15,
123
130
  },
131
+ "bytedance/seedance-2.0": {
132
+ durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
133
+ start_image: "image",
134
+ last_image: "last_frame_image",
135
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
136
+ price_per_sec: 0.29,
137
+ },
138
+ "bytedance/seedance-2.0-fast": {
139
+ durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
140
+ start_image: "image",
141
+ last_image: "last_frame_image",
142
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
143
+ price_per_sec: 0.22,
144
+ },
124
145
  "kwaivgi/kling-v1.6-pro": {
125
146
  durations: [5, 10],
126
147
  start_image: "start_image",
148
+ audio: { mode: AUDIO_MODE_NEVER },
127
149
  price_per_sec: 0.095,
128
150
  },
129
151
  "kwaivgi/kling-v2.1": {
130
152
  durations: [5, 10],
131
153
  start_image: "start_image",
154
+ audio: { mode: AUDIO_MODE_NEVER },
132
155
  price_per_sec: 0.05,
133
156
  },
134
157
  "kwaivgi/kling-v2.1-master": {
135
158
  durations: [5, 10],
136
159
  start_image: "start_image",
160
+ audio: { mode: AUDIO_MODE_NEVER },
137
161
  price_per_sec: 0.28,
138
162
  },
139
163
  "google/veo-2": {
140
164
  durations: [5, 6, 7, 8],
141
165
  start_image: "image",
166
+ audio: { mode: AUDIO_MODE_NEVER },
142
167
  price_per_sec: 0.5,
143
168
  },
144
169
  "google/veo-3": {
145
170
  durations: [8],
146
171
  start_image: "image",
172
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
147
173
  price_per_sec: 0.75,
148
174
  },
149
175
  "google/veo-3.1": {
@@ -151,71 +177,84 @@ export const provider2MovieAgent = {
151
177
  start_image: "image",
152
178
  last_image: "last_frame_image",
153
179
  reference_images_param: "reference_images",
180
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
154
181
  price_per_sec: 0.75,
155
182
  },
156
183
  "google/veo-3.1-fast": {
157
184
  durations: [4, 6, 8],
158
185
  start_image: "image",
159
186
  last_image: "last_frame_image",
187
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
160
188
  price_per_sec: 0.4,
161
189
  },
162
190
  "google/veo-3.1-lite": {
163
191
  durations: [4, 6, 8],
164
192
  start_image: "image",
165
193
  last_image: "last_frame",
194
+ audio: { mode: AUDIO_MODE_NEVER },
166
195
  price_per_sec: 0.05,
167
196
  },
168
197
  "google/veo-3-fast": {
169
198
  durations: [8],
170
199
  start_image: "image",
200
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
171
201
  price_per_sec: 0.4,
172
202
  },
173
203
  "minimax/video-01": {
174
204
  durations: [6],
175
205
  start_image: "first_frame_image",
206
+ audio: { mode: AUDIO_MODE_NEVER },
176
207
  price_per_sec: 0.5,
177
208
  },
178
209
  "minimax/hailuo-02": {
179
210
  durations: [6], // NOTE: 10 for only 720p
180
211
  start_image: "first_frame_image",
181
212
  last_image: "end_image",
213
+ audio: { mode: AUDIO_MODE_NEVER },
182
214
  price_per_sec: 0.08,
183
215
  },
184
216
  "minimax/hailuo-02-fast": {
185
217
  durations: [6, 10], // NOTE: 512P
186
218
  start_image: "first_frame_image",
219
+ audio: { mode: AUDIO_MODE_NEVER },
187
220
  price_per_sec: 0.0166,
188
221
  },
189
222
  "pixverse/pixverse-v4.5": {
190
223
  durations: [5, 8],
191
224
  start_image: "image",
192
225
  last_image: "last_frame_image",
226
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "sound_effect_switch" },
193
227
  price_per_sec: 0.12,
194
228
  },
195
229
  "wan-video/wan-2.2-i2v-fast": {
196
230
  durations: [5],
197
231
  start_image: "image",
232
+ audio: { mode: AUDIO_MODE_NEVER },
198
233
  price_per_sec: 0.012,
199
234
  },
200
235
  "wan-video/wan-2.2-t2v-fast": {
201
236
  durations: [5],
202
237
  start_image: undefined,
238
+ audio: { mode: AUDIO_MODE_NEVER },
203
239
  price_per_sec: 0.012,
204
240
  },
205
241
  "xai/grok-imagine-video": {
206
242
  durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
207
243
  start_image: "image",
244
+ audio: { mode: AUDIO_MODE_NEVER },
208
245
  price_per_sec: 0.08,
209
246
  },
210
247
  "xai/grok-imagine-r2v": {
211
248
  durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
212
249
  start_image: undefined,
213
250
  reference_images_param: "reference_images",
251
+ audio: { mode: AUDIO_MODE_NEVER },
214
252
  price_per_sec: 0.08,
215
253
  },
216
254
  "runwayml/gen-4.5": {
217
255
  durations: [5, 10],
218
256
  start_image: "image",
257
+ audio: { mode: AUDIO_MODE_NEVER },
219
258
  price_per_sec: 0.25,
220
259
  },
221
260
  "kwaivgi/kling-v3-omni-video": {
@@ -223,6 +262,7 @@ export const provider2MovieAgent = {
223
262
  start_image: "start_image",
224
263
  last_image: "end_image",
225
264
  reference_images_param: "reference_images",
265
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
226
266
  price_per_sec: 0.3,
227
267
  },
228
268
  "kwaivgi/kling-v3-video": {
@@ -230,6 +270,7 @@ export const provider2MovieAgent = {
230
270
  start_image: "start_image",
231
271
  last_image: "end_image",
232
272
  reference_images_param: "reference_images",
273
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
233
274
  price_per_sec: 0.3,
234
275
  },
235
276
  },
@@ -246,6 +287,7 @@ export const provider2MovieAgent = {
246
287
  supportsLastFrame: true,
247
288
  supportsReferenceImages: false,
248
289
  supportsPersonGeneration: false,
290
+ audio: { mode: AUDIO_MODE_ALWAYS },
249
291
  },
250
292
  "veo-3.1-generate-preview": {
251
293
  durations: [4, 6, 8],
@@ -253,6 +295,7 @@ export const provider2MovieAgent = {
253
295
  supportsLastFrame: true,
254
296
  supportsReferenceImages: true,
255
297
  supportsPersonGeneration: false,
298
+ audio: { mode: AUDIO_MODE_ALWAYS },
256
299
  },
257
300
  "veo-3.0-generate-001": {
258
301
  durations: [8],
@@ -260,6 +303,7 @@ export const provider2MovieAgent = {
260
303
  supportsLastFrame: false,
261
304
  supportsReferenceImages: false,
262
305
  supportsPersonGeneration: false,
306
+ audio: { mode: AUDIO_MODE_ALWAYS },
263
307
  },
264
308
  "veo-2.0-generate-001": {
265
309
  durations: [5, 6, 8],
@@ -267,6 +311,7 @@ export const provider2MovieAgent = {
267
311
  supportsLastFrame: false, // Vertex AI only
268
312
  supportsReferenceImages: false,
269
313
  supportsPersonGeneration: true,
314
+ audio: { mode: AUDIO_MODE_NEVER },
270
315
  },
271
316
  },
272
317
  },
@@ -3299,6 +3299,7 @@ export declare const mulmoImageParamsSchema: z.ZodObject<{
3299
3299
  }>>;
3300
3300
  opacity: z.ZodOptional<z.ZodNumber>;
3301
3301
  }, z.core.$strip>]>>>;
3302
+ concurrency: z.ZodOptional<z.ZodNumber>;
3302
3303
  }, z.core.$strict>;
3303
3304
  export declare const textSlideParamsSchema: z.ZodObject<{
3304
3305
  cssStyles: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
@@ -3398,6 +3399,7 @@ export declare const audioParamsSchema: z.ZodObject<{
3398
3399
  ducking: z.ZodOptional<z.ZodObject<{
3399
3400
  ratio: z.ZodOptional<z.ZodNumber>;
3400
3401
  }, z.core.$strip>>;
3402
+ concurrency: z.ZodOptional<z.ZodNumber>;
3401
3403
  }, z.core.$strict>;
3402
3404
  export declare const htmlPromptParamsSchema: z.ZodObject<{
3403
3405
  systemPrompt: z.ZodDefault<z.ZodOptional<z.ZodString>>;
@@ -3688,6 +3690,8 @@ export declare const mulmoMovieParamsSchema: z.ZodObject<{
3688
3690
  STYLE: "STYLE";
3689
3691
  }>;
3690
3692
  }, z.core.$strip>>>;
3693
+ concurrency: z.ZodOptional<z.ZodNumber>;
3694
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
3691
3695
  }, z.core.$strip>;
3692
3696
  export declare const mulmoBeatSchema: z.ZodObject<{
3693
3697
  speaker: z.ZodOptional<z.ZodString>;
@@ -6581,6 +6585,8 @@ export declare const mulmoBeatSchema: z.ZodObject<{
6581
6585
  STYLE: "STYLE";
6582
6586
  }>;
6583
6587
  }, z.core.$strip>>>;
6588
+ concurrency: z.ZodOptional<z.ZodNumber>;
6589
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
6584
6590
  speed: z.ZodOptional<z.ZodNumber>;
6585
6591
  }, z.core.$strip>>;
6586
6592
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -6829,6 +6835,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
6829
6835
  }>>;
6830
6836
  opacity: z.ZodOptional<z.ZodNumber>;
6831
6837
  }, z.core.$strip>]>>>;
6838
+ concurrency: z.ZodOptional<z.ZodNumber>;
6832
6839
  }, z.core.$strict>>>;
6833
6840
  movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
6834
6841
  provider: z.ZodOptional<z.ZodEnum<{
@@ -7068,6 +7075,8 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
7068
7075
  STYLE: "STYLE";
7069
7076
  }>;
7070
7077
  }, z.core.$strip>>>;
7078
+ concurrency: z.ZodOptional<z.ZodNumber>;
7079
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
7071
7080
  }, z.core.$strip>>>;
7072
7081
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
7073
7082
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -7191,6 +7200,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
7191
7200
  ducking: z.ZodOptional<z.ZodObject<{
7192
7201
  ratio: z.ZodOptional<z.ZodNumber>;
7193
7202
  }, z.core.$strip>>;
7203
+ concurrency: z.ZodOptional<z.ZodNumber>;
7194
7204
  }, z.core.$strict>>;
7195
7205
  }, z.core.$strip>;
7196
7206
  export declare const mulmoReferenceSchema: z.ZodObject<{
@@ -7330,6 +7340,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
7330
7340
  }>>;
7331
7341
  opacity: z.ZodOptional<z.ZodNumber>;
7332
7342
  }, z.core.$strip>]>>>;
7343
+ concurrency: z.ZodOptional<z.ZodNumber>;
7333
7344
  }, z.core.$strict>>>;
7334
7345
  movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
7335
7346
  provider: z.ZodOptional<z.ZodEnum<{
@@ -7569,6 +7580,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
7569
7580
  STYLE: "STYLE";
7570
7581
  }>;
7571
7582
  }, z.core.$strip>>>;
7583
+ concurrency: z.ZodOptional<z.ZodNumber>;
7584
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
7572
7585
  }, z.core.$strip>>>;
7573
7586
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
7574
7587
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -7692,6 +7705,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
7692
7705
  ducking: z.ZodOptional<z.ZodObject<{
7693
7706
  ratio: z.ZodOptional<z.ZodNumber>;
7694
7707
  }, z.core.$strip>>;
7708
+ concurrency: z.ZodOptional<z.ZodNumber>;
7695
7709
  }, z.core.$strict>>;
7696
7710
  title: z.ZodOptional<z.ZodString>;
7697
7711
  description: z.ZodOptional<z.ZodString>;
@@ -10600,6 +10614,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
10600
10614
  STYLE: "STYLE";
10601
10615
  }>;
10602
10616
  }, z.core.$strip>>>;
10617
+ concurrency: z.ZodOptional<z.ZodNumber>;
10618
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
10603
10619
  speed: z.ZodOptional<z.ZodNumber>;
10604
10620
  }, z.core.$strip>>;
10605
10621
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -10923,6 +10939,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
10923
10939
  }>>;
10924
10940
  opacity: z.ZodOptional<z.ZodNumber>;
10925
10941
  }, z.core.$strip>]>>>;
10942
+ concurrency: z.ZodOptional<z.ZodNumber>;
10926
10943
  }, z.core.$strict>>>;
10927
10944
  movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
10928
10945
  provider: z.ZodOptional<z.ZodEnum<{
@@ -11162,6 +11179,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
11162
11179
  STYLE: "STYLE";
11163
11180
  }>;
11164
11181
  }, z.core.$strip>>>;
11182
+ concurrency: z.ZodOptional<z.ZodNumber>;
11183
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
11165
11184
  }, z.core.$strip>>>;
11166
11185
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
11167
11186
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -11285,6 +11304,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
11285
11304
  ducking: z.ZodOptional<z.ZodObject<{
11286
11305
  ratio: z.ZodOptional<z.ZodNumber>;
11287
11306
  }, z.core.$strip>>;
11307
+ concurrency: z.ZodOptional<z.ZodNumber>;
11288
11308
  }, z.core.$strict>>;
11289
11309
  title: z.ZodOptional<z.ZodString>;
11290
11310
  description: z.ZodOptional<z.ZodString>;
@@ -14193,6 +14213,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
14193
14213
  STYLE: "STYLE";
14194
14214
  }>;
14195
14215
  }, z.core.$strip>>>;
14216
+ concurrency: z.ZodOptional<z.ZodNumber>;
14217
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
14196
14218
  speed: z.ZodOptional<z.ZodNumber>;
14197
14219
  }, z.core.$strip>>;
14198
14220
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -14452,6 +14474,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
14452
14474
  }>>;
14453
14475
  opacity: z.ZodOptional<z.ZodNumber>;
14454
14476
  }, z.core.$strip>]>>>;
14477
+ concurrency: z.ZodOptional<z.ZodNumber>;
14455
14478
  }, z.core.$strict>>>;
14456
14479
  movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
14457
14480
  provider: z.ZodOptional<z.ZodEnum<{
@@ -14691,6 +14714,8 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
14691
14714
  STYLE: "STYLE";
14692
14715
  }>;
14693
14716
  }, z.core.$strip>>>;
14717
+ concurrency: z.ZodOptional<z.ZodNumber>;
14718
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
14694
14719
  }, z.core.$strip>>>;
14695
14720
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
14696
14721
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -14814,6 +14839,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
14814
14839
  ducking: z.ZodOptional<z.ZodObject<{
14815
14840
  ratio: z.ZodOptional<z.ZodNumber>;
14816
14841
  }, z.core.$strip>>;
14842
+ concurrency: z.ZodOptional<z.ZodNumber>;
14817
14843
  }, z.core.$strict>>;
14818
14844
  }, z.core.$strip>>;
14819
14845
  }, z.core.$strict>;
@@ -14947,6 +14973,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
14947
14973
  }>>;
14948
14974
  opacity: z.ZodOptional<z.ZodNumber>;
14949
14975
  }, z.core.$strip>]>>>;
14976
+ concurrency: z.ZodOptional<z.ZodNumber>;
14950
14977
  }, z.core.$strict>>>;
14951
14978
  movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
14952
14979
  provider: z.ZodOptional<z.ZodEnum<{
@@ -15186,6 +15213,8 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
15186
15213
  STYLE: "STYLE";
15187
15214
  }>;
15188
15215
  }, z.core.$strip>>>;
15216
+ concurrency: z.ZodOptional<z.ZodNumber>;
15217
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
15189
15218
  }, z.core.$strip>>>;
15190
15219
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
15191
15220
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -15309,6 +15338,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
15309
15338
  ducking: z.ZodOptional<z.ZodObject<{
15310
15339
  ratio: z.ZodOptional<z.ZodNumber>;
15311
15340
  }, z.core.$strip>>;
15341
+ concurrency: z.ZodOptional<z.ZodNumber>;
15312
15342
  }, z.core.$strict>>;
15313
15343
  }, z.core.$strip>>;
15314
15344
  filename: z.ZodString;
@@ -393,6 +393,7 @@ export const mulmoImageParamsSchema = mulmoBeatImageParamsSchema
393
393
  .extend({
394
394
  images: mulmoImageParamsImagesSchema.optional(),
395
395
  backgroundImage: backgroundImageSchema,
396
+ concurrency: z.number().int().positive().optional().describe("Max concurrent image generation requests"),
396
397
  })
397
398
  .strict();
398
399
  export const textSlideParamsSchema = z
@@ -436,6 +437,7 @@ export const audioParamsSchema = z
436
437
  })
437
438
  .optional()
438
439
  .describe("Auto-reduce movie audio when TTS is playing"),
440
+ concurrency: z.number().int().positive().optional().describe("Max concurrent TTS generation requests"),
439
441
  })
440
442
  .strict();
441
443
  export const htmlPromptParamsSchema = z
@@ -496,6 +498,8 @@ export const mulmoMovieParamsSchema = z.object({
496
498
  .array(movieReferenceImageSchema)
497
499
  .optional()
498
500
  .describe("Style/asset reference images (Veo 3.1). Mutually exclusive with imageName/lastFrameImageName"),
501
+ concurrency: z.number().int().positive().optional().describe("Max concurrent movie generation requests"),
502
+ generateAudio: z.boolean().optional().describe("Request audio generation in the video (model-dependent)"),
499
503
  });
500
504
  export const mulmoBeatSchema = z
501
505
  .object({
@@ -115,6 +115,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
115
115
  size?: "contain" | "cover" | "fill" | "auto" | undefined;
116
116
  opacity?: number | undefined;
117
117
  } | null | undefined;
118
+ concurrency?: number | undefined;
118
119
  };
119
120
  movieParams: {
120
121
  provider?: string | undefined;
@@ -280,6 +281,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
280
281
  imageName: string;
281
282
  referenceType: "ASSET" | "STYLE";
282
283
  }[] | undefined;
284
+ concurrency?: number | undefined;
285
+ generateAudio?: boolean | undefined;
283
286
  };
284
287
  soundEffectParams: {
285
288
  provider?: string | undefined;
@@ -308,6 +311,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
308
311
  ducking?: {
309
312
  ratio?: number | undefined;
310
313
  } | undefined;
314
+ concurrency?: number | undefined;
311
315
  };
312
316
  lang: string;
313
317
  beats: {
@@ -2018,6 +2022,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
2018
2022
  imageName: string;
2019
2023
  referenceType: "ASSET" | "STYLE";
2020
2024
  }[] | undefined;
2025
+ concurrency?: number | undefined;
2026
+ generateAudio?: boolean | undefined;
2021
2027
  speed?: number | undefined;
2022
2028
  } | undefined;
2023
2029
  soundEffectParams?: {
@@ -2347,6 +2353,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
2347
2353
  size?: "contain" | "cover" | "fill" | "auto" | undefined;
2348
2354
  opacity?: number | undefined;
2349
2355
  } | null | undefined;
2356
+ concurrency?: number | undefined;
2350
2357
  };
2351
2358
  movieParams: {
2352
2359
  provider?: string | undefined;
@@ -2512,6 +2519,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
2512
2519
  imageName: string;
2513
2520
  referenceType: "ASSET" | "STYLE";
2514
2521
  }[] | undefined;
2522
+ concurrency?: number | undefined;
2523
+ generateAudio?: boolean | undefined;
2515
2524
  };
2516
2525
  soundEffectParams: {
2517
2526
  provider?: string | undefined;
@@ -2540,6 +2549,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
2540
2549
  ducking?: {
2541
2550
  ratio?: number | undefined;
2542
2551
  } | undefined;
2552
+ concurrency?: number | undefined;
2543
2553
  };
2544
2554
  lang: string;
2545
2555
  beats: {
@@ -4250,6 +4260,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4250
4260
  imageName: string;
4251
4261
  referenceType: "ASSET" | "STYLE";
4252
4262
  }[] | undefined;
4263
+ concurrency?: number | undefined;
4264
+ generateAudio?: boolean | undefined;
4253
4265
  speed?: number | undefined;
4254
4266
  } | undefined;
4255
4267
  soundEffectParams?: {
@@ -4586,6 +4598,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4586
4598
  size?: "contain" | "cover" | "fill" | "auto" | undefined;
4587
4599
  opacity?: number | undefined;
4588
4600
  } | null | undefined;
4601
+ concurrency?: number | undefined;
4589
4602
  };
4590
4603
  movieParams: {
4591
4604
  provider?: string | undefined;
@@ -4751,6 +4764,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4751
4764
  imageName: string;
4752
4765
  referenceType: "ASSET" | "STYLE";
4753
4766
  }[] | undefined;
4767
+ concurrency?: number | undefined;
4768
+ generateAudio?: boolean | undefined;
4754
4769
  };
4755
4770
  soundEffectParams: {
4756
4771
  provider?: string | undefined;
@@ -4779,6 +4794,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4779
4794
  ducking?: {
4780
4795
  ratio?: number | undefined;
4781
4796
  } | undefined;
4797
+ concurrency?: number | undefined;
4782
4798
  };
4783
4799
  lipSyncParams?: {
4784
4800
  provider?: string | undefined;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.6.6",
3
+ "version": "2.6.8",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -24,8 +24,9 @@
24
24
  }
25
25
  },
26
26
  "resolutions": {
27
- "minimatch": "^10.2.4",
28
- "yauzl": "^3.2.1"
27
+ "minimatch": "^10.2.5",
28
+ "tar": "7.5.13",
29
+ "yauzl": "^3.3.0"
29
30
  },
30
31
  "bin": {
31
32
  "mulmo": "lib/cli/bin.js",
@@ -88,7 +89,7 @@
88
89
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
89
90
  "dependencies": {
90
91
  "@google-cloud/text-to-speech": "^6.4.0",
91
- "@google/genai": "^1.48.0",
92
+ "@google/genai": "^1.50.1",
92
93
  "@graphai/anthropic_agent": "^2.0.12",
93
94
  "@graphai/browserless_agent": "^2.0.2",
94
95
  "@graphai/gemini_agent": "^2.0.5",
@@ -98,21 +99,21 @@
98
99
  "@graphai/stream_agent_filter": "^2.0.3",
99
100
  "@graphai/vanilla": "^2.0.12",
100
101
  "@graphai/vanilla_node_agents": "^2.0.4",
101
- "@inquirer/input": "^5.0.10",
102
- "@inquirer/select": "^5.1.2",
102
+ "@inquirer/input": "^5.0.12",
103
+ "@inquirer/select": "^5.1.4",
103
104
  "@modelcontextprotocol/sdk": "^1.29.0",
104
105
  "@mozilla/readability": "^0.6.0",
105
106
  "@tavily/core": "^0.5.11",
106
107
  "archiver": "^7.0.1",
107
108
  "clipboardy": "^5.3.1",
108
- "dotenv": "^17.4.0",
109
+ "dotenv": "^17.4.2",
109
110
  "fluent-ffmpeg": "^2.1.3",
110
111
  "graphai": "^2.0.16",
111
- "jsdom": "^29.0.1",
112
- "marked": "^17.0.5",
112
+ "jsdom": "^29.0.2",
113
+ "marked": "^18.0.2",
113
114
  "mulmocast-vision": "^1.0.9",
114
115
  "ora": "^9.3.0",
115
- "puppeteer": "^24.40.0",
116
+ "puppeteer": "^24.41.0",
116
117
  "replicate": "^1.4.0",
117
118
  "yaml": "^2.8.3",
118
119
  "yargs": "^18.0.0",
@@ -126,16 +127,16 @@
126
127
  "@types/jsdom": "^28.0.1",
127
128
  "@types/yargs": "^17.0.35",
128
129
  "cross-env": "^10.1.0",
129
- "eslint": "^10.1.0",
130
+ "eslint": "^10.2.1",
130
131
  "eslint-config-prettier": "^10.1.8",
131
132
  "eslint-plugin-import": "^2.32.0",
132
133
  "eslint-plugin-prettier": "^5.5.5",
133
- "eslint-plugin-sonarjs": "^4.0.2",
134
- "globals": "^17.4.0",
135
- "prettier": "^3.8.1",
134
+ "eslint-plugin-sonarjs": "^4.0.3",
135
+ "globals": "^17.5.0",
136
+ "prettier": "^3.8.3",
136
137
  "tsx": "^4.21.0",
137
- "typescript": "6.0.2",
138
- "typescript-eslint": "^8.58.0"
138
+ "typescript": "6.0.3",
139
+ "typescript-eslint": "^8.58.1"
139
140
  },
140
141
  "engines": {
141
142
  "node": ">=22.0.0"
@@ -0,0 +1,33 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "title": "generateAudio Test",
4
+ "lang": "en",
5
+ "canvasSize": { "width": 1280, "height": 720 },
6
+ "audioParams": {
7
+ "bgmVolume": 0,
8
+ "suppressSpeech": true
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "kwaivgi/kling-v3-video",
13
+ "concurrency": 1
14
+ },
15
+ "beats": [
16
+ {
17
+ "text": "Audio ON (generateAudio: true)",
18
+ "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
19
+ "movieParams": {
20
+ "generateAudio": true
21
+ },
22
+ "duration": 3
23
+ },
24
+ {
25
+ "text": "Audio OFF (generateAudio: false)",
26
+ "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
27
+ "movieParams": {
28
+ "generateAudio": false
29
+ },
30
+ "duration": 3
31
+ }
32
+ ]
33
+ }
@@ -0,0 +1,61 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "movieParams": {
4
+ "provider": "replicate",
5
+ "model": "bytedance/seedance-2.0"
6
+ },
7
+ "audioParams": {
8
+ "bgmVolume": 0
9
+ },
10
+ "captionParams": {
11
+ "lang": "en"
12
+ },
13
+ "lang": "en",
14
+ "beats": [
15
+ {
16
+ "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
17
+ "image": {
18
+ "type": "textSlide",
19
+ "slide": {
20
+ "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "id": "seedance-1-lite",
26
+ "text": "bytedance/seedance-1-lite",
27
+ "duration": 5,
28
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
29
+ "movieParams": {
30
+ "model": "bytedance/seedance-1-lite"
31
+ }
32
+ },
33
+ {
34
+ "id": "seedance-1-pro",
35
+ "text": "bytedance/seedance-1-pro",
36
+ "duration": 5,
37
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
38
+ "movieParams": {
39
+ "model": "bytedance/seedance-1-pro"
40
+ }
41
+ },
42
+ {
43
+ "id": "seedance-2.0",
44
+ "text": "bytedance/seedance-2.0",
45
+ "duration": 10,
46
+ "moviePrompt": "A high-energy 10-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
47
+ "movieParams": {
48
+ "model": "bytedance/seedance-2.0"
49
+ }
50
+ },
51
+ {
52
+ "id": "seedance-2.0-fast",
53
+ "text": "bytedance/seedance-2.0-fast",
54
+ "duration": 8,
55
+ "moviePrompt": "A high-energy 8-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
56
+ "movieParams": {
57
+ "model": "bytedance/seedance-2.0-fast"
58
+ }
59
+ }
60
+ ]
61
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "movieParams": {
4
+ "provider": "replicate",
5
+ "model": "bytedance/seedance-2.0"
6
+ },
7
+ "audioParams": {
8
+ "bgmVolume": 0
9
+ },
10
+ "captionParams": {
11
+ "lang": "en"
12
+ },
13
+ "lang": "en",
14
+ "beats": [
15
+ {
16
+ "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
17
+ "image": {
18
+ "type": "textSlide",
19
+ "slide": {
20
+ "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "id": "seedance-2.0",
26
+ "text": "bytedance/seedance-2.0",
27
+ "duration": 5,
28
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
29
+ "movieParams": {
30
+ "model": "bytedance/seedance-2.0"
31
+ }
32
+ },
33
+ {
34
+ "id": "seedance-2.0-fast",
35
+ "text": "bytedance/seedance-2.0-fast",
36
+ "duration": 5,
37
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
38
+ "movieParams": {
39
+ "model": "bytedance/seedance-2.0-fast"
40
+ }
41
+ }
42
+ ]
43
+ }