mulmocast 2.6.7 → 2.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,6 +121,7 @@ const generateReferenceMovie = async (inputs) => {
121
121
  params: {
122
122
  model: movieAgentInfo.movieParams.model,
123
123
  canvasSize: context.presentationStyle.canvasSize,
124
+ generateAudio: movieAgentInfo.movieParams.generateAudio,
124
125
  },
125
126
  },
126
127
  },
@@ -324,6 +324,7 @@ export declare const beat_graph_data: {
324
324
  canvasSize: string;
325
325
  vertexai_project: string;
326
326
  vertexai_location: string;
327
+ generateAudio: string;
327
328
  };
328
329
  };
329
330
  defaultValue: {};
@@ -194,6 +194,7 @@ export const beat_graph_data = {
194
194
  canvasSize: ":context.presentationStyle.canvasSize",
195
195
  vertexai_project: ":preprocessor.movieAgentInfo.movieParams.vertexai_project",
196
196
  vertexai_location: ":preprocessor.movieAgentInfo.movieParams.vertexai_location",
197
+ generateAudio: ":preprocessor.movieAgentInfo.movieParams.generateAudio",
197
198
  },
198
199
  },
199
200
  defaultValue: {},
@@ -1,10 +1,10 @@
1
1
  import { readFileSync, writeFileSync } from "fs";
2
2
  import { GraphAILogger, sleep } from "graphai";
3
3
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
4
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, hasCause, } from "../utils/error_cause.js";
4
+ import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, hasCause, } from "../utils/error_cause.js";
5
5
  import { getAspectRatio } from "../utils/utils.js";
6
6
  import { ASPECT_RATIOS } from "../types/const.js";
7
- import { getModelDuration, provider2MovieAgent } from "../types/provider2agent.js";
7
+ import { getModelDuration, provider2MovieAgent, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
8
8
  const pollUntilDone = async (ai, operation) => {
9
9
  const response = { operation };
10
10
  while (!response.operation.done) {
@@ -154,6 +154,18 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
154
154
  cause: agentGenerationError("movieGenAIAgent", imageAction, videoDurationTarget),
155
155
  });
156
156
  }
157
+ // Check generateAudio compatibility (Google API has no toggle)
158
+ if (params.generateAudio !== undefined) {
159
+ const audio = provider2MovieAgent.google.modelParams[model]?.audio ?? { mode: AUDIO_MODE_NEVER };
160
+ if (audio.mode === AUDIO_MODE_NEVER && params.generateAudio === true) {
161
+ throw new Error(`Model ${model} does not support audio generation`, {
162
+ cause: agentGenerationError("movieGenAIAgent", imageAction, unsupportedModelTarget),
163
+ });
164
+ }
165
+ else if (audio.mode === AUDIO_MODE_ALWAYS && params.generateAudio === false) {
166
+ GraphAILogger.warn(`movieGenAIAgent: model ${model} always generates audio — ignoring generateAudio=false`);
167
+ }
168
+ }
157
169
  const isVertexAI = !!params.vertexai_project;
158
170
  const ai = isVertexAI
159
171
  ? new GoogleGenAI({
@@ -1,8 +1,8 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
- import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
- import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
4
+ import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, hasCause, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
+ import { provider2MovieAgent, getModelDuration, AUDIO_MODE_OPTIONAL, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
6
6
  function replicate_get_videoUrl(output) {
7
7
  if (typeof output === "string")
8
8
  return output;
@@ -10,7 +10,7 @@ function replicate_get_videoUrl(output) {
10
10
  return output.url();
11
11
  return undefined;
12
12
  }
13
- async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
13
+ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, generateAudio) {
14
14
  const replicate = new Replicate({
15
15
  auth: apiKey,
16
16
  });
@@ -77,6 +77,21 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
77
77
  GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support lastFrame — ignoring lastFrameImageName`);
78
78
  }
79
79
  }
80
+ // Add generate_audio if the model supports it
81
+ const audio = provider2MovieAgent.replicate.modelParams[model].audio;
82
+ if (generateAudio !== undefined) {
83
+ if (audio.mode === AUDIO_MODE_OPTIONAL) {
84
+ input[audio.param] = generateAudio;
85
+ }
86
+ else if (audio.mode === AUDIO_MODE_NEVER && generateAudio === true) {
87
+ throw new Error(`Model ${model} does not support audio generation`, {
88
+ cause: agentGenerationError("movieReplicateAgent", imageAction, unsupportedModelTarget),
89
+ });
90
+ }
91
+ else if (audio.mode === AUDIO_MODE_ALWAYS && generateAudio === false) {
92
+ GraphAILogger.warn(`movieReplicateAgent: model ${model} always generates audio — ignoring generateAudio=false`);
93
+ }
94
+ }
80
95
  try {
81
96
  const output = await replicate.run(model, { input });
82
97
  // Download the generated video
@@ -134,12 +149,15 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
134
149
  });
135
150
  }
136
151
  try {
137
- const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
152
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, params.generateAudio);
138
153
  if (buffer) {
139
154
  return { buffer };
140
155
  }
141
156
  }
142
157
  catch (error) {
158
+ if (hasCause(error)) {
159
+ throw error;
160
+ }
143
161
  GraphAILogger.info("Failed to generate movie:", error.message);
144
162
  }
145
163
  throw new Error("ERROR: generateMovie returned undefined", {
@@ -2,6 +2,10 @@ import { GraphAILogger } from "graphai";
2
2
  import * as textToSpeech from "@google-cloud/text-to-speech";
3
3
  import { agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
4
4
  const client = new textToSpeech.TextToSpeechClient();
5
+ // Hard cap so a hung Google TTS RPC can't pin a beat indefinitely.
6
+ // Most synthesizeSpeech calls return in seconds; 60s leaves headroom
7
+ // for long inputs and slow regions while still failing loud.
8
+ const SYNTHESIZE_TIMEOUT_MS = 60_000;
5
9
  const getPrompt = (text, instructions) => {
6
10
  if (instructions) {
7
11
  return `### DIRECTOR'S NOTES\n${instructions}\n\n#### TRANSCRIPT\n${text}`;
@@ -37,7 +41,7 @@ export const ttsGoogleAgent = async ({ namedInputs, params }) => {
37
41
  };
38
42
  try {
39
43
  // Call the Text-to-Speech API
40
- const [response] = await client.synthesizeSpeech(request);
44
+ const [response] = await client.synthesizeSpeech(request, { timeout: SYNTHESIZE_TIMEOUT_MS });
41
45
  return { buffer: response.audioContent };
42
46
  }
43
47
  catch (e) {
@@ -47,11 +51,23 @@ export const ttsGoogleAgent = async ({ namedInputs, params }) => {
47
51
  };
48
52
  }
49
53
  GraphAILogger.info(e);
50
- throw new Error("TTS Google Error", {
54
+ // gRPC errors from @google-cloud/text-to-speech are ServiceError
55
+ // (extends Error with a `details` string). Surface that human-readable
56
+ // text so callers don't see only "TTS Google Error".
57
+ throw new Error(`TTS Google Error: ${grpcErrorDetail(e)}`, {
51
58
  cause: agentGenerationError("ttsGoogleAgent", audioAction, audioFileTarget),
52
59
  });
53
60
  }
54
61
  };
62
+ const grpcErrorDetail = (e) => {
63
+ if (e instanceof Error) {
64
+ const details = e.details;
65
+ if (typeof details === "string" && details)
66
+ return details;
67
+ return e.message;
68
+ }
69
+ return String(e);
70
+ };
55
71
  const ttsGoogleAgentInfo = {
56
72
  name: "ttsGoogleAgent",
57
73
  agent: ttsGoogleAgent,
@@ -180,6 +180,7 @@ export declare const MulmoPresentationStyleMethods: {
180
180
  referenceType: "ASSET" | "STYLE";
181
181
  }[] | undefined;
182
182
  concurrency?: number | undefined;
183
+ generateAudio?: boolean | undefined;
183
184
  speed?: number | undefined;
184
185
  };
185
186
  keyName: string;
@@ -84,6 +84,7 @@ export type GoogleMovieAgentParams = ImageAgentParams & {
84
84
  duration?: number;
85
85
  vertexai_project?: string;
86
86
  vertexai_location?: string;
87
+ generateAudio?: boolean;
87
88
  };
88
89
  export type ReplicateMovieAgentParams = {
89
90
  model: `${string}/${string}` | undefined;
@@ -92,6 +93,7 @@ export type ReplicateMovieAgentParams = {
92
93
  height: number;
93
94
  };
94
95
  duration?: number;
96
+ generateAudio?: boolean;
95
97
  };
96
98
  export type ReplicateSoundEffectAgentParams = {
97
99
  model: `${string}/${string}` | undefined;
@@ -70,32 +70,47 @@ export declare const provider2ImageAgent: {
70
70
  };
71
71
  };
72
72
  export type ReplicateModel = `${string}/${string}`;
73
+ export declare const AUDIO_MODE_NEVER: "never";
74
+ export declare const AUDIO_MODE_ALWAYS: "always";
75
+ export declare const AUDIO_MODE_OPTIONAL: "optional";
76
+ type MovieAudioSpec = {
77
+ mode: typeof AUDIO_MODE_NEVER;
78
+ } | {
79
+ mode: typeof AUDIO_MODE_ALWAYS;
80
+ } | {
81
+ mode: typeof AUDIO_MODE_OPTIONAL;
82
+ param: string;
83
+ };
84
+ type ReplicateMovieModelParams = {
85
+ durations: number[];
86
+ start_image: string | undefined;
87
+ last_image?: string;
88
+ reference_images_param?: string;
89
+ audio: MovieAudioSpec;
90
+ price_per_sec: number;
91
+ };
92
+ type GoogleMovieModelParams = {
93
+ durations: number[];
94
+ supportsDuration: boolean;
95
+ supportsLastFrame: boolean;
96
+ supportsReferenceImages: boolean;
97
+ supportsPersonGeneration: boolean;
98
+ audio: MovieAudioSpec;
99
+ };
73
100
  export declare const provider2MovieAgent: {
74
101
  replicate: {
75
102
  agentName: string;
76
103
  defaultModel: ReplicateModel;
77
104
  keyName: string;
78
105
  models: string[];
79
- modelParams: Record<ReplicateModel, {
80
- durations: number[];
81
- start_image: string | undefined;
82
- last_image?: string;
83
- reference_images_param?: string;
84
- price_per_sec: number;
85
- }>;
106
+ modelParams: Record<ReplicateModel, ReplicateMovieModelParams>;
86
107
  };
87
108
  google: {
88
109
  agentName: string;
89
110
  defaultModel: string;
90
111
  models: string[];
91
112
  keyName: string;
92
- modelParams: Record<string, {
93
- durations: number[];
94
- supportsDuration: boolean;
95
- supportsLastFrame: boolean;
96
- supportsReferenceImages: boolean;
97
- supportsPersonGeneration: boolean;
98
- }>;
113
+ modelParams: Record<string, GoogleMovieModelParams>;
99
114
  };
100
115
  mock: {
101
116
  agentName: string;
@@ -182,3 +197,4 @@ export declare const llm: (keyof typeof provider2LLMAgent)[];
182
197
  export type LLM = keyof typeof provider2LLMAgent;
183
198
  export declare const htmlLLMProvider: string[];
184
199
  export declare const getModelDuration: (provider: keyof typeof provider2MovieAgent, model: string, movieDuration?: number) => number | undefined;
200
+ export {};
@@ -44,7 +44,7 @@ export const provider2TTSAgent = {
44
44
  models: ["mock-model"],
45
45
  },
46
46
  };
47
- export const gptImages = ["gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
47
+ export const gptImages = ["gpt-image-2", "gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
48
48
  export const provider2ImageAgent = {
49
49
  openai: {
50
50
  agentName: "imageOpenaiAgent",
@@ -79,6 +79,9 @@ export const provider2ImageAgent = {
79
79
  keyName: "",
80
80
  },
81
81
  };
82
+ export const AUDIO_MODE_NEVER = "never";
83
+ export const AUDIO_MODE_ALWAYS = "always";
84
+ export const AUDIO_MODE_OPTIONAL = "optional";
82
85
  export const provider2MovieAgent = {
83
86
  replicate: {
84
87
  agentName: "movieReplicateAgent",
@@ -87,6 +90,8 @@ export const provider2MovieAgent = {
87
90
  models: [
88
91
  "bytedance/seedance-1-lite",
89
92
  "bytedance/seedance-1-pro",
93
+ "bytedance/seedance-2.0",
94
+ "bytedance/seedance-2.0-fast",
90
95
  "kwaivgi/kling-v1.6-pro",
91
96
  "kwaivgi/kling-v2.1",
92
97
  "kwaivgi/kling-v2.1-master",
@@ -113,37 +118,58 @@ export const provider2MovieAgent = {
113
118
  durations: [5, 10],
114
119
  start_image: "image",
115
120
  last_image: "last_frame_image",
121
+ audio: { mode: AUDIO_MODE_NEVER },
116
122
  price_per_sec: 0.036, // in USD
117
123
  },
118
124
  "bytedance/seedance-1-pro": {
119
125
  durations: [5, 10],
120
126
  start_image: "image",
121
127
  last_image: "last_frame_image",
128
+ audio: { mode: AUDIO_MODE_NEVER },
122
129
  price_per_sec: 0.15,
123
130
  },
131
+ "bytedance/seedance-2.0": {
132
+ durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
133
+ start_image: "image",
134
+ last_image: "last_frame_image",
135
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
136
+ price_per_sec: 0.29,
137
+ },
138
+ "bytedance/seedance-2.0-fast": {
139
+ durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
140
+ start_image: "image",
141
+ last_image: "last_frame_image",
142
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
143
+ price_per_sec: 0.22,
144
+ },
124
145
  "kwaivgi/kling-v1.6-pro": {
125
146
  durations: [5, 10],
126
147
  start_image: "start_image",
148
+ audio: { mode: AUDIO_MODE_NEVER },
127
149
  price_per_sec: 0.095,
128
150
  },
129
151
  "kwaivgi/kling-v2.1": {
130
152
  durations: [5, 10],
131
153
  start_image: "start_image",
154
+ audio: { mode: AUDIO_MODE_NEVER },
132
155
  price_per_sec: 0.05,
133
156
  },
134
157
  "kwaivgi/kling-v2.1-master": {
135
158
  durations: [5, 10],
136
159
  start_image: "start_image",
160
+ audio: { mode: AUDIO_MODE_NEVER },
137
161
  price_per_sec: 0.28,
138
162
  },
139
163
  "google/veo-2": {
140
164
  durations: [5, 6, 7, 8],
141
165
  start_image: "image",
166
+ audio: { mode: AUDIO_MODE_NEVER },
142
167
  price_per_sec: 0.5,
143
168
  },
144
169
  "google/veo-3": {
145
170
  durations: [8],
146
171
  start_image: "image",
172
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
147
173
  price_per_sec: 0.75,
148
174
  },
149
175
  "google/veo-3.1": {
@@ -151,71 +177,84 @@ export const provider2MovieAgent = {
151
177
  start_image: "image",
152
178
  last_image: "last_frame_image",
153
179
  reference_images_param: "reference_images",
180
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
154
181
  price_per_sec: 0.75,
155
182
  },
156
183
  "google/veo-3.1-fast": {
157
184
  durations: [4, 6, 8],
158
185
  start_image: "image",
159
186
  last_image: "last_frame_image",
187
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
160
188
  price_per_sec: 0.4,
161
189
  },
162
190
  "google/veo-3.1-lite": {
163
191
  durations: [4, 6, 8],
164
192
  start_image: "image",
165
193
  last_image: "last_frame",
194
+ audio: { mode: AUDIO_MODE_NEVER },
166
195
  price_per_sec: 0.05,
167
196
  },
168
197
  "google/veo-3-fast": {
169
198
  durations: [8],
170
199
  start_image: "image",
200
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
171
201
  price_per_sec: 0.4,
172
202
  },
173
203
  "minimax/video-01": {
174
204
  durations: [6],
175
205
  start_image: "first_frame_image",
206
+ audio: { mode: AUDIO_MODE_NEVER },
176
207
  price_per_sec: 0.5,
177
208
  },
178
209
  "minimax/hailuo-02": {
179
210
  durations: [6], // NOTE: 10 for only 720p
180
211
  start_image: "first_frame_image",
181
212
  last_image: "end_image",
213
+ audio: { mode: AUDIO_MODE_NEVER },
182
214
  price_per_sec: 0.08,
183
215
  },
184
216
  "minimax/hailuo-02-fast": {
185
217
  durations: [6, 10], // NOTE: 512P
186
218
  start_image: "first_frame_image",
219
+ audio: { mode: AUDIO_MODE_NEVER },
187
220
  price_per_sec: 0.0166,
188
221
  },
189
222
  "pixverse/pixverse-v4.5": {
190
223
  durations: [5, 8],
191
224
  start_image: "image",
192
225
  last_image: "last_frame_image",
226
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "sound_effect_switch" },
193
227
  price_per_sec: 0.12,
194
228
  },
195
229
  "wan-video/wan-2.2-i2v-fast": {
196
230
  durations: [5],
197
231
  start_image: "image",
232
+ audio: { mode: AUDIO_MODE_NEVER },
198
233
  price_per_sec: 0.012,
199
234
  },
200
235
  "wan-video/wan-2.2-t2v-fast": {
201
236
  durations: [5],
202
237
  start_image: undefined,
238
+ audio: { mode: AUDIO_MODE_NEVER },
203
239
  price_per_sec: 0.012,
204
240
  },
205
241
  "xai/grok-imagine-video": {
206
242
  durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
207
243
  start_image: "image",
244
+ audio: { mode: AUDIO_MODE_NEVER },
208
245
  price_per_sec: 0.08,
209
246
  },
210
247
  "xai/grok-imagine-r2v": {
211
248
  durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
212
249
  start_image: undefined,
213
250
  reference_images_param: "reference_images",
251
+ audio: { mode: AUDIO_MODE_NEVER },
214
252
  price_per_sec: 0.08,
215
253
  },
216
254
  "runwayml/gen-4.5": {
217
255
  durations: [5, 10],
218
256
  start_image: "image",
257
+ audio: { mode: AUDIO_MODE_NEVER },
219
258
  price_per_sec: 0.25,
220
259
  },
221
260
  "kwaivgi/kling-v3-omni-video": {
@@ -223,6 +262,7 @@ export const provider2MovieAgent = {
223
262
  start_image: "start_image",
224
263
  last_image: "end_image",
225
264
  reference_images_param: "reference_images",
265
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
226
266
  price_per_sec: 0.3,
227
267
  },
228
268
  "kwaivgi/kling-v3-video": {
@@ -230,6 +270,7 @@ export const provider2MovieAgent = {
230
270
  start_image: "start_image",
231
271
  last_image: "end_image",
232
272
  reference_images_param: "reference_images",
273
+ audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
233
274
  price_per_sec: 0.3,
234
275
  },
235
276
  },
@@ -246,6 +287,7 @@ export const provider2MovieAgent = {
246
287
  supportsLastFrame: true,
247
288
  supportsReferenceImages: false,
248
289
  supportsPersonGeneration: false,
290
+ audio: { mode: AUDIO_MODE_ALWAYS },
249
291
  },
250
292
  "veo-3.1-generate-preview": {
251
293
  durations: [4, 6, 8],
@@ -253,6 +295,7 @@ export const provider2MovieAgent = {
253
295
  supportsLastFrame: true,
254
296
  supportsReferenceImages: true,
255
297
  supportsPersonGeneration: false,
298
+ audio: { mode: AUDIO_MODE_ALWAYS },
256
299
  },
257
300
  "veo-3.0-generate-001": {
258
301
  durations: [8],
@@ -260,6 +303,7 @@ export const provider2MovieAgent = {
260
303
  supportsLastFrame: false,
261
304
  supportsReferenceImages: false,
262
305
  supportsPersonGeneration: false,
306
+ audio: { mode: AUDIO_MODE_ALWAYS },
263
307
  },
264
308
  "veo-2.0-generate-001": {
265
309
  durations: [5, 6, 8],
@@ -267,6 +311,7 @@ export const provider2MovieAgent = {
267
311
  supportsLastFrame: false, // Vertex AI only
268
312
  supportsReferenceImages: false,
269
313
  supportsPersonGeneration: true,
314
+ audio: { mode: AUDIO_MODE_NEVER },
270
315
  },
271
316
  },
272
317
  },
@@ -3691,6 +3691,7 @@ export declare const mulmoMovieParamsSchema: z.ZodObject<{
3691
3691
  }>;
3692
3692
  }, z.core.$strip>>>;
3693
3693
  concurrency: z.ZodOptional<z.ZodNumber>;
3694
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
3694
3695
  }, z.core.$strip>;
3695
3696
  export declare const mulmoBeatSchema: z.ZodObject<{
3696
3697
  speaker: z.ZodOptional<z.ZodString>;
@@ -6585,6 +6586,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
6585
6586
  }>;
6586
6587
  }, z.core.$strip>>>;
6587
6588
  concurrency: z.ZodOptional<z.ZodNumber>;
6589
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
6588
6590
  speed: z.ZodOptional<z.ZodNumber>;
6589
6591
  }, z.core.$strip>>;
6590
6592
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -7074,6 +7076,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
7074
7076
  }>;
7075
7077
  }, z.core.$strip>>>;
7076
7078
  concurrency: z.ZodOptional<z.ZodNumber>;
7079
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
7077
7080
  }, z.core.$strip>>>;
7078
7081
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
7079
7082
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -7578,6 +7581,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
7578
7581
  }>;
7579
7582
  }, z.core.$strip>>>;
7580
7583
  concurrency: z.ZodOptional<z.ZodNumber>;
7584
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
7581
7585
  }, z.core.$strip>>>;
7582
7586
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
7583
7587
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -10611,6 +10615,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
10611
10615
  }>;
10612
10616
  }, z.core.$strip>>>;
10613
10617
  concurrency: z.ZodOptional<z.ZodNumber>;
10618
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
10614
10619
  speed: z.ZodOptional<z.ZodNumber>;
10615
10620
  }, z.core.$strip>>;
10616
10621
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -11175,6 +11180,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
11175
11180
  }>;
11176
11181
  }, z.core.$strip>>>;
11177
11182
  concurrency: z.ZodOptional<z.ZodNumber>;
11183
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
11178
11184
  }, z.core.$strip>>>;
11179
11185
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
11180
11186
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -14208,6 +14214,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
14208
14214
  }>;
14209
14215
  }, z.core.$strip>>>;
14210
14216
  concurrency: z.ZodOptional<z.ZodNumber>;
14217
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
14211
14218
  speed: z.ZodOptional<z.ZodNumber>;
14212
14219
  }, z.core.$strip>>;
14213
14220
  soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -14708,6 +14715,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
14708
14715
  }>;
14709
14716
  }, z.core.$strip>>>;
14710
14717
  concurrency: z.ZodOptional<z.ZodNumber>;
14718
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
14711
14719
  }, z.core.$strip>>>;
14712
14720
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
14713
14721
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -15206,6 +15214,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
15206
15214
  }>;
15207
15215
  }, z.core.$strip>>>;
15208
15216
  concurrency: z.ZodOptional<z.ZodNumber>;
15217
+ generateAudio: z.ZodOptional<z.ZodBoolean>;
15209
15218
  }, z.core.$strip>>>;
15210
15219
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
15211
15220
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -499,6 +499,7 @@ export const mulmoMovieParamsSchema = z.object({
499
499
  .optional()
500
500
  .describe("Style/asset reference images (Veo 3.1). Mutually exclusive with imageName/lastFrameImageName"),
501
501
  concurrency: z.number().int().positive().optional().describe("Max concurrent movie generation requests"),
502
+ generateAudio: z.boolean().optional().describe("Request audio generation in the video (model-dependent)"),
502
503
  });
503
504
  export const mulmoBeatSchema = z
504
505
  .object({
@@ -282,6 +282,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
282
282
  referenceType: "ASSET" | "STYLE";
283
283
  }[] | undefined;
284
284
  concurrency?: number | undefined;
285
+ generateAudio?: boolean | undefined;
285
286
  };
286
287
  soundEffectParams: {
287
288
  provider?: string | undefined;
@@ -2022,6 +2023,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
2022
2023
  referenceType: "ASSET" | "STYLE";
2023
2024
  }[] | undefined;
2024
2025
  concurrency?: number | undefined;
2026
+ generateAudio?: boolean | undefined;
2025
2027
  speed?: number | undefined;
2026
2028
  } | undefined;
2027
2029
  soundEffectParams?: {
@@ -2518,6 +2520,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
2518
2520
  referenceType: "ASSET" | "STYLE";
2519
2521
  }[] | undefined;
2520
2522
  concurrency?: number | undefined;
2523
+ generateAudio?: boolean | undefined;
2521
2524
  };
2522
2525
  soundEffectParams: {
2523
2526
  provider?: string | undefined;
@@ -4258,6 +4261,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4258
4261
  referenceType: "ASSET" | "STYLE";
4259
4262
  }[] | undefined;
4260
4263
  concurrency?: number | undefined;
4264
+ generateAudio?: boolean | undefined;
4261
4265
  speed?: number | undefined;
4262
4266
  } | undefined;
4263
4267
  soundEffectParams?: {
@@ -4761,6 +4765,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4761
4765
  referenceType: "ASSET" | "STYLE";
4762
4766
  }[] | undefined;
4763
4767
  concurrency?: number | undefined;
4768
+ generateAudio?: boolean | undefined;
4764
4769
  };
4765
4770
  soundEffectParams: {
4766
4771
  provider?: string | undefined;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.6.7",
3
+ "version": "2.6.9",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -89,7 +89,7 @@
89
89
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
90
90
  "dependencies": {
91
91
  "@google-cloud/text-to-speech": "^6.4.0",
92
- "@google/genai": "^1.49.0",
92
+ "@google/genai": "^1.50.1",
93
93
  "@graphai/anthropic_agent": "^2.0.12",
94
94
  "@graphai/browserless_agent": "^2.0.2",
95
95
  "@graphai/gemini_agent": "^2.0.5",
@@ -99,8 +99,8 @@
99
99
  "@graphai/stream_agent_filter": "^2.0.3",
100
100
  "@graphai/vanilla": "^2.0.12",
101
101
  "@graphai/vanilla_node_agents": "^2.0.4",
102
- "@inquirer/input": "^5.0.11",
103
- "@inquirer/select": "^5.1.3",
102
+ "@inquirer/input": "^5.0.12",
103
+ "@inquirer/select": "^5.1.4",
104
104
  "@modelcontextprotocol/sdk": "^1.29.0",
105
105
  "@mozilla/readability": "^0.6.0",
106
106
  "@tavily/core": "^0.5.11",
@@ -109,11 +109,11 @@
109
109
  "dotenv": "^17.4.2",
110
110
  "fluent-ffmpeg": "^2.1.3",
111
111
  "graphai": "^2.0.16",
112
- "jsdom": "^29.0.2",
113
- "marked": "^18.0.0",
112
+ "jsdom": "^29.1.0",
113
+ "marked": "^18.0.2",
114
114
  "mulmocast-vision": "^1.0.9",
115
- "ora": "^9.3.0",
116
- "puppeteer": "^24.40.0",
115
+ "ora": "^9.4.0",
116
+ "puppeteer": "^24.42.0",
117
117
  "replicate": "^1.4.0",
118
118
  "yaml": "^2.8.3",
119
119
  "yargs": "^18.0.0",
@@ -127,16 +127,16 @@
127
127
  "@types/jsdom": "^28.0.1",
128
128
  "@types/yargs": "^17.0.35",
129
129
  "cross-env": "^10.1.0",
130
- "eslint": "^10.2.0",
130
+ "eslint": "^10.2.1",
131
131
  "eslint-config-prettier": "^10.1.8",
132
132
  "eslint-plugin-import": "^2.32.0",
133
133
  "eslint-plugin-prettier": "^5.5.5",
134
- "eslint-plugin-sonarjs": "^4.0.2",
134
+ "eslint-plugin-sonarjs": "^4.0.3",
135
135
  "globals": "^17.5.0",
136
- "prettier": "^3.8.2",
136
+ "prettier": "^3.8.3",
137
137
  "tsx": "^4.21.0",
138
- "typescript": "6.0.2",
139
- "typescript-eslint": "^8.58.1"
138
+ "typescript": "6.0.3",
139
+ "typescript-eslint": "^8.59.1"
140
140
  },
141
141
  "engines": {
142
142
  "node": ">=22.0.0"
@@ -106,7 +106,7 @@ Language setting tests
106
106
  Provider-specific feature tests
107
107
 
108
108
  - [**test_hello_google.json**](./test_hello_google.json) - Google TTS専用テスト / Google TTS specific test
109
- - [**gpt.json**](./gpt.json) - GPTモデルテスト / GPT model test
109
+ - [**test_gpt_image.json**](./test_gpt_image.json) - GPT image model test
110
110
  - [**mulmo_story.json**](./mulmo_story.json) - ストーリー形式テスト / Story format test
111
111
  - [**nano_banana.json**](./nano_banana.json) - カスタムサンプル / Custom sample
112
112
 
@@ -0,0 +1,33 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "title": "generateAudio Test",
4
+ "lang": "en",
5
+ "canvasSize": { "width": 1280, "height": 720 },
6
+ "audioParams": {
7
+ "bgmVolume": 0,
8
+ "suppressSpeech": true
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "kwaivgi/kling-v3-video",
13
+ "concurrency": 1
14
+ },
15
+ "beats": [
16
+ {
17
+ "text": "Audio ON (generateAudio: true)",
18
+ "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
19
+ "movieParams": {
20
+ "generateAudio": true
21
+ },
22
+ "duration": 3
23
+ },
24
+ {
25
+ "text": "Audio OFF (generateAudio: false)",
26
+ "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
27
+ "movieParams": {
28
+ "generateAudio": false
29
+ },
30
+ "duration": 3
31
+ }
32
+ ]
33
+ }
@@ -21,6 +21,14 @@
21
21
  }
22
22
  },
23
23
  "beats": [
24
+ {
25
+ "speaker": "Host",
26
+ "text": "How are you?",
27
+ "imagePrompt": "A witch in Harajuku",
28
+ "imageParams": {
29
+ "model": "gpt-image-2"
30
+ }
31
+ },
24
32
  {
25
33
  "speaker": "Host",
26
34
  "text": "How are you?",
@@ -0,0 +1,61 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "movieParams": {
4
+ "provider": "replicate",
5
+ "model": "bytedance/seedance-2.0"
6
+ },
7
+ "audioParams": {
8
+ "bgmVolume": 0
9
+ },
10
+ "captionParams": {
11
+ "lang": "en"
12
+ },
13
+ "lang": "en",
14
+ "beats": [
15
+ {
16
+ "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
17
+ "image": {
18
+ "type": "textSlide",
19
+ "slide": {
20
+ "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "id": "seedance-1-lite",
26
+ "text": "bytedance/seedance-1-lite",
27
+ "duration": 5,
28
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
29
+ "movieParams": {
30
+ "model": "bytedance/seedance-1-lite"
31
+ }
32
+ },
33
+ {
34
+ "id": "seedance-1-pro",
35
+ "text": "bytedance/seedance-1-pro",
36
+ "duration": 5,
37
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
38
+ "movieParams": {
39
+ "model": "bytedance/seedance-1-pro"
40
+ }
41
+ },
42
+ {
43
+ "id": "seedance-2.0",
44
+ "text": "bytedance/seedance-2.0",
45
+ "duration": 10,
46
+ "moviePrompt": "A high-energy 10-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
47
+ "movieParams": {
48
+ "model": "bytedance/seedance-2.0"
49
+ }
50
+ },
51
+ {
52
+ "id": "seedance-2.0-fast",
53
+ "text": "bytedance/seedance-2.0-fast",
54
+ "duration": 8,
55
+ "moviePrompt": "A high-energy 8-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
56
+ "movieParams": {
57
+ "model": "bytedance/seedance-2.0-fast"
58
+ }
59
+ }
60
+ ]
61
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "movieParams": {
4
+ "provider": "replicate",
5
+ "model": "bytedance/seedance-2.0"
6
+ },
7
+ "audioParams": {
8
+ "bgmVolume": 0
9
+ },
10
+ "captionParams": {
11
+ "lang": "en"
12
+ },
13
+ "lang": "en",
14
+ "beats": [
15
+ {
16
+ "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
17
+ "image": {
18
+ "type": "textSlide",
19
+ "slide": {
20
+ "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "id": "seedance-2.0",
26
+ "text": "bytedance/seedance-2.0",
27
+ "duration": 5,
28
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
29
+ "movieParams": {
30
+ "model": "bytedance/seedance-2.0"
31
+ }
32
+ },
33
+ {
34
+ "id": "seedance-2.0-fast",
35
+ "text": "bytedance/seedance-2.0-fast",
36
+ "duration": 5,
37
+ "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
38
+ "movieParams": {
39
+ "model": "bytedance/seedance-2.0-fast"
40
+ }
41
+ }
42
+ ]
43
+ }