npm - mulmocast - Versions diffs - 2.6.7 → 2.6.9 - Mend

mulmocast 2.6.7 → 2.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/lib/actions/image_references.js +1 -0
package/lib/actions/images.d.ts +1 -0
package/lib/actions/images.js +1 -0
package/lib/agents/movie_genai_agent.js +14 -2
package/lib/agents/movie_replicate_agent.js +22 -4
package/lib/agents/tts_google_agent.js +18 -2
package/lib/methods/mulmo_presentation_style.d.ts +1 -0
package/lib/types/agent.d.ts +2 -0
package/lib/types/provider2agent.d.ts +30 -14
package/lib/types/provider2agent.js +46 -1
package/lib/types/schema.d.ts +9 -0
package/lib/types/schema.js +1 -0
package/lib/utils/context.d.ts +5 -0
package/package.json +13 -13
package/scripts/test/README.md +1 -1
package/scripts/test/test_generate_audio.json +33 -0
package/scripts/test/{gpt.json → test_gpt_image.json} +8 -0
package/scripts/test/test_seedance.json +61 -0
package/scripts/test/test_seedance2_only.json +43 -0

package/lib/actions/image_references.js CHANGED Viewed

@@ -121,6 +121,7 @@ const generateReferenceMovie = async (inputs) => {
                 params: {
                     model: movieAgentInfo.movieParams.model,
                     canvasSize: context.presentationStyle.canvasSize,
+                    generateAudio: movieAgentInfo.movieParams.generateAudio,
                 },
             },
         },

package/lib/actions/images.d.ts CHANGED Viewed

@@ -324,6 +324,7 @@ export declare const beat_graph_data: {
                     canvasSize: string;
                     vertexai_project: string;
                     vertexai_location: string;
+                    generateAudio: string;
                 };
             };
             defaultValue: {};

package/lib/actions/images.js CHANGED Viewed

@@ -194,6 +194,7 @@ export const beat_graph_data = {
                     canvasSize: ":context.presentationStyle.canvasSize",
                     vertexai_project: ":preprocessor.movieAgentInfo.movieParams.vertexai_project",
                     vertexai_location: ":preprocessor.movieAgentInfo.movieParams.vertexai_location",
+                    generateAudio: ":preprocessor.movieAgentInfo.movieParams.generateAudio",
                 },
             },
             defaultValue: {},

package/lib/agents/movie_genai_agent.js CHANGED Viewed

@@ -1,10 +1,10 @@
 import { readFileSync, writeFileSync } from "fs";
 import { GraphAILogger, sleep } from "graphai";
 import { GoogleGenAI, PersonGeneration } from "@google/genai";
-import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, hasCause, } from "../utils/error_cause.js";
+import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, hasCause, } from "../utils/error_cause.js";
 import { getAspectRatio } from "../utils/utils.js";
 import { ASPECT_RATIOS } from "../types/const.js";
-import { getModelDuration, provider2MovieAgent } from "../types/provider2agent.js";
+import { getModelDuration, provider2MovieAgent, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
 const pollUntilDone = async (ai, operation) => {
     const response = { operation };
     while (!response.operation.done) {
@@ -154,6 +154,18 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
                 cause: agentGenerationError("movieGenAIAgent", imageAction, videoDurationTarget),
             });
         }
+        // Check generateAudio compatibility (Google API has no toggle)
+        if (params.generateAudio !== undefined) {
+            const audio = provider2MovieAgent.google.modelParams[model]?.audio ?? { mode: AUDIO_MODE_NEVER };
+            if (audio.mode === AUDIO_MODE_NEVER && params.generateAudio === true) {
+                throw new Error(`Model ${model} does not support audio generation`, {
+                    cause: agentGenerationError("movieGenAIAgent", imageAction, unsupportedModelTarget),
+                });
+            }
+            else if (audio.mode === AUDIO_MODE_ALWAYS && params.generateAudio === false) {
+                GraphAILogger.warn(`movieGenAIAgent: model ${model} always generates audio — ignoring generateAudio=false`);
+            }
+        }
         const isVertexAI = !!params.vertexai_project;
         const ai = isVertexAI
             ? new GoogleGenAI({

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { readFileSync } from "fs";
 import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
-import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
-import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
+import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, hasCause, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
+import { provider2MovieAgent, getModelDuration, AUDIO_MODE_OPTIONAL, AUDIO_MODE_NEVER, AUDIO_MODE_ALWAYS } from "../types/provider2agent.js";
 function replicate_get_videoUrl(output) {
     if (typeof output === "string")
         return output;
@@ -10,7 +10,7 @@ function replicate_get_videoUrl(output) {
         return output.url();
     return undefined;
 }
-async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration) {
+async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, generateAudio) {
     const replicate = new Replicate({
         auth: apiKey,
     });
@@ -77,6 +77,21 @@ async function generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePat
             GraphAILogger.warn(`movieReplicateAgent: model ${model} does not support lastFrame — ignoring lastFrameImageName`);
         }
     }
+    // Add generate_audio if the model supports it
+    const audio = provider2MovieAgent.replicate.modelParams[model].audio;
+    if (generateAudio !== undefined) {
+        if (audio.mode === AUDIO_MODE_OPTIONAL) {
+            input[audio.param] = generateAudio;
+        }
+        else if (audio.mode === AUDIO_MODE_NEVER && generateAudio === true) {
+            throw new Error(`Model ${model} does not support audio generation`, {
+                cause: agentGenerationError("movieReplicateAgent", imageAction, unsupportedModelTarget),
+            });
+        }
+        else if (audio.mode === AUDIO_MODE_ALWAYS && generateAudio === false) {
+            GraphAILogger.warn(`movieReplicateAgent: model ${model} always generates audio — ignoring generateAudio=false`);
+        }
+    }
     try {
         const output = await replicate.run(model, { input });
         // Download the generated video
@@ -134,12 +149,15 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
         });
     }
     try {
-        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration);
+        const buffer = await generateMovie(model, apiKey, prompt, imagePath, lastFrameImagePath, referenceImages, aspectRatio, duration, params.generateAudio);
         if (buffer) {
             return { buffer };
         }
     }
     catch (error) {
+        if (hasCause(error)) {
+            throw error;
+        }
         GraphAILogger.info("Failed to generate movie:", error.message);
     }
     throw new Error("ERROR: generateMovie returned undefined", {

package/lib/agents/tts_google_agent.js CHANGED Viewed

@@ -2,6 +2,10 @@ import { GraphAILogger } from "graphai";
 import * as textToSpeech from "@google-cloud/text-to-speech";
 import { agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
 const client = new textToSpeech.TextToSpeechClient();
+// Hard cap so a hung Google TTS RPC can't pin a beat indefinitely.
+// Most synthesizeSpeech calls return in seconds; 60s leaves headroom
+// for long inputs and slow regions while still failing loud.
+const SYNTHESIZE_TIMEOUT_MS = 60_000;
 const getPrompt = (text, instructions) => {
     if (instructions) {
         return `### DIRECTOR'S NOTES\n${instructions}\n\n#### TRANSCRIPT\n${text}`;
@@ -37,7 +41,7 @@ export const ttsGoogleAgent = async ({ namedInputs, params }) => {
     };
     try {
         // Call the Text-to-Speech API
-        const [response] = await client.synthesizeSpeech(request);
+        const [response] = await client.synthesizeSpeech(request, { timeout: SYNTHESIZE_TIMEOUT_MS });
         return { buffer: response.audioContent };
     }
     catch (e) {
@@ -47,11 +51,23 @@ export const ttsGoogleAgent = async ({ namedInputs, params }) => {
             };
         }
         GraphAILogger.info(e);
-        throw new Error("TTS Google Error", {
+        // gRPC errors from @google-cloud/text-to-speech are ServiceError
+        // (extends Error with a `details` string). Surface that human-readable
+        // text so callers don't see only "TTS Google Error".
+        throw new Error(`TTS Google Error: ${grpcErrorDetail(e)}`, {
             cause: agentGenerationError("ttsGoogleAgent", audioAction, audioFileTarget),
         });
     }
 };
+const grpcErrorDetail = (e) => {
+    if (e instanceof Error) {
+        const details = e.details;
+        if (typeof details === "string" && details)
+            return details;
+        return e.message;
+    }
+    return String(e);
+};
 const ttsGoogleAgentInfo = {
     name: "ttsGoogleAgent",
     agent: ttsGoogleAgent,

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -180,6 +180,7 @@ export declare const MulmoPresentationStyleMethods: {
                 referenceType: "ASSET" | "STYLE";
             }[] | undefined;
             concurrency?: number | undefined;
+            generateAudio?: boolean | undefined;
             speed?: number | undefined;
         };
         keyName: string;

package/lib/types/agent.d.ts CHANGED Viewed

@@ -84,6 +84,7 @@ export type GoogleMovieAgentParams = ImageAgentParams & {
     duration?: number;
     vertexai_project?: string;
     vertexai_location?: string;
+    generateAudio?: boolean;
 };
 export type ReplicateMovieAgentParams = {
     model: `${string}/${string}` | undefined;
@@ -92,6 +93,7 @@ export type ReplicateMovieAgentParams = {
         height: number;
     };
     duration?: number;
+    generateAudio?: boolean;
 };
 export type ReplicateSoundEffectAgentParams = {
     model: `${string}/${string}` | undefined;

package/lib/types/provider2agent.d.ts CHANGED Viewed

@@ -70,32 +70,47 @@ export declare const provider2ImageAgent: {
     };
 };
 export type ReplicateModel = `${string}/${string}`;
+export declare const AUDIO_MODE_NEVER: "never";
+export declare const AUDIO_MODE_ALWAYS: "always";
+export declare const AUDIO_MODE_OPTIONAL: "optional";
+type MovieAudioSpec = {
+    mode: typeof AUDIO_MODE_NEVER;
+} | {
+    mode: typeof AUDIO_MODE_ALWAYS;
+} | {
+    mode: typeof AUDIO_MODE_OPTIONAL;
+    param: string;
+};
+type ReplicateMovieModelParams = {
+    durations: number[];
+    start_image: string | undefined;
+    last_image?: string;
+    reference_images_param?: string;
+    audio: MovieAudioSpec;
+    price_per_sec: number;
+};
+type GoogleMovieModelParams = {
+    durations: number[];
+    supportsDuration: boolean;
+    supportsLastFrame: boolean;
+    supportsReferenceImages: boolean;
+    supportsPersonGeneration: boolean;
+    audio: MovieAudioSpec;
+};
 export declare const provider2MovieAgent: {
     replicate: {
         agentName: string;
         defaultModel: ReplicateModel;
         keyName: string;
         models: string[];
-        modelParams: Record<ReplicateModel, {
-            durations: number[];
-            start_image: string | undefined;
-            last_image?: string;
-            reference_images_param?: string;
-            price_per_sec: number;
-        }>;
+        modelParams: Record<ReplicateModel, ReplicateMovieModelParams>;
     };
     google: {
         agentName: string;
         defaultModel: string;
         models: string[];
         keyName: string;
-        modelParams: Record<string, {
-            durations: number[];
-            supportsDuration: boolean;
-            supportsLastFrame: boolean;
-            supportsReferenceImages: boolean;
-            supportsPersonGeneration: boolean;
-        }>;
+        modelParams: Record<string, GoogleMovieModelParams>;
     };
     mock: {
         agentName: string;
@@ -182,3 +197,4 @@ export declare const llm: (keyof typeof provider2LLMAgent)[];
 export type LLM = keyof typeof provider2LLMAgent;
 export declare const htmlLLMProvider: string[];
 export declare const getModelDuration: (provider: keyof typeof provider2MovieAgent, model: string, movieDuration?: number) => number | undefined;
+export {};

package/lib/types/provider2agent.js CHANGED Viewed

@@ -44,7 +44,7 @@ export const provider2TTSAgent = {
         models: ["mock-model"],
     },
 };
-export const gptImages = ["gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
+export const gptImages = ["gpt-image-2", "gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
 export const provider2ImageAgent = {
     openai: {
         agentName: "imageOpenaiAgent",
@@ -79,6 +79,9 @@ export const provider2ImageAgent = {
         keyName: "",
     },
 };
+export const AUDIO_MODE_NEVER = "never";
+export const AUDIO_MODE_ALWAYS = "always";
+export const AUDIO_MODE_OPTIONAL = "optional";
 export const provider2MovieAgent = {
     replicate: {
         agentName: "movieReplicateAgent",
@@ -87,6 +90,8 @@ export const provider2MovieAgent = {
         models: [
             "bytedance/seedance-1-lite",
             "bytedance/seedance-1-pro",
+            "bytedance/seedance-2.0",
+            "bytedance/seedance-2.0-fast",
             "kwaivgi/kling-v1.6-pro",
             "kwaivgi/kling-v2.1",
             "kwaivgi/kling-v2.1-master",
@@ -113,37 +118,58 @@ export const provider2MovieAgent = {
                 durations: [5, 10],
                 start_image: "image",
                 last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.036, // in USD
             },
             "bytedance/seedance-1-pro": {
                 durations: [5, 10],
                 start_image: "image",
                 last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.15,
             },
+            "bytedance/seedance-2.0": {
+                durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "image",
+                last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
+                price_per_sec: 0.29,
+            },
+            "bytedance/seedance-2.0-fast": {
+                durations: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+                start_image: "image",
+                last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
+                price_per_sec: 0.22,
+            },
             "kwaivgi/kling-v1.6-pro": {
                 durations: [5, 10],
                 start_image: "start_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.095,
             },
             "kwaivgi/kling-v2.1": {
                 durations: [5, 10],
                 start_image: "start_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.05,
             },
             "kwaivgi/kling-v2.1-master": {
                 durations: [5, 10],
                 start_image: "start_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.28,
             },
             "google/veo-2": {
                 durations: [5, 6, 7, 8],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.5,
             },
             "google/veo-3": {
                 durations: [8],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.75,
             },
             "google/veo-3.1": {
@@ -151,71 +177,84 @@ export const provider2MovieAgent = {
                 start_image: "image",
                 last_image: "last_frame_image",
                 reference_images_param: "reference_images",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.75,
             },
             "google/veo-3.1-fast": {
                 durations: [4, 6, 8],
                 start_image: "image",
                 last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.4,
             },
             "google/veo-3.1-lite": {
                 durations: [4, 6, 8],
                 start_image: "image",
                 last_image: "last_frame",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.05,
             },
             "google/veo-3-fast": {
                 durations: [8],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.4,
             },
             "minimax/video-01": {
                 durations: [6],
                 start_image: "first_frame_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.5,
             },
             "minimax/hailuo-02": {
                 durations: [6], // NOTE: 10 for only 720p
                 start_image: "first_frame_image",
                 last_image: "end_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.08,
             },
             "minimax/hailuo-02-fast": {
                 durations: [6, 10], // NOTE: 512P
                 start_image: "first_frame_image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.0166,
             },
             "pixverse/pixverse-v4.5": {
                 durations: [5, 8],
                 start_image: "image",
                 last_image: "last_frame_image",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "sound_effect_switch" },
                 price_per_sec: 0.12,
             },
             "wan-video/wan-2.2-i2v-fast": {
                 durations: [5],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.012,
             },
             "wan-video/wan-2.2-t2v-fast": {
                 durations: [5],
                 start_image: undefined,
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.012,
             },
             "xai/grok-imagine-video": {
                 durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.08,
             },
             "xai/grok-imagine-r2v": {
                 durations: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                 start_image: undefined,
                 reference_images_param: "reference_images",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.08,
             },
             "runwayml/gen-4.5": {
                 durations: [5, 10],
                 start_image: "image",
+                audio: { mode: AUDIO_MODE_NEVER },
                 price_per_sec: 0.25,
             },
             "kwaivgi/kling-v3-omni-video": {
@@ -223,6 +262,7 @@ export const provider2MovieAgent = {
                 start_image: "start_image",
                 last_image: "end_image",
                 reference_images_param: "reference_images",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.3,
             },
             "kwaivgi/kling-v3-video": {
@@ -230,6 +270,7 @@ export const provider2MovieAgent = {
                 start_image: "start_image",
                 last_image: "end_image",
                 reference_images_param: "reference_images",
+                audio: { mode: AUDIO_MODE_OPTIONAL, param: "generate_audio" },
                 price_per_sec: 0.3,
             },
         },
@@ -246,6 +287,7 @@ export const provider2MovieAgent = {
                 supportsLastFrame: true,
                 supportsReferenceImages: false,
                 supportsPersonGeneration: false,
+                audio: { mode: AUDIO_MODE_ALWAYS },
             },
             "veo-3.1-generate-preview": {
                 durations: [4, 6, 8],
@@ -253,6 +295,7 @@ export const provider2MovieAgent = {
                 supportsLastFrame: true,
                 supportsReferenceImages: true,
                 supportsPersonGeneration: false,
+                audio: { mode: AUDIO_MODE_ALWAYS },
             },
             "veo-3.0-generate-001": {
                 durations: [8],
@@ -260,6 +303,7 @@ export const provider2MovieAgent = {
                 supportsLastFrame: false,
                 supportsReferenceImages: false,
                 supportsPersonGeneration: false,
+                audio: { mode: AUDIO_MODE_ALWAYS },
             },
             "veo-2.0-generate-001": {
                 durations: [5, 6, 8],
@@ -267,6 +311,7 @@ export const provider2MovieAgent = {
                 supportsLastFrame: false, // Vertex AI only
                 supportsReferenceImages: false,
                 supportsPersonGeneration: true,
+                audio: { mode: AUDIO_MODE_NEVER },
             },
         },
     },

package/lib/types/schema.d.ts CHANGED Viewed

@@ -3691,6 +3691,7 @@ export declare const mulmoMovieParamsSchema: z.ZodObject<{
         }>;
     }, z.core.$strip>>>;
     concurrency: z.ZodOptional<z.ZodNumber>;
+    generateAudio: z.ZodOptional<z.ZodBoolean>;
 }, z.core.$strip>;
 export declare const mulmoBeatSchema: z.ZodObject<{
     speaker: z.ZodOptional<z.ZodString>;
@@ -6585,6 +6586,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
             }>;
         }, z.core.$strip>>>;
         concurrency: z.ZodOptional<z.ZodNumber>;
+        generateAudio: z.ZodOptional<z.ZodBoolean>;
         speed: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>>;
     soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -7074,6 +7076,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
             }>;
         }, z.core.$strip>>>;
         concurrency: z.ZodOptional<z.ZodNumber>;
+        generateAudio: z.ZodOptional<z.ZodBoolean>;
     }, z.core.$strip>>>;
     soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
         provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -7578,6 +7581,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
             }>;
         }, z.core.$strip>>>;
         concurrency: z.ZodOptional<z.ZodNumber>;
+        generateAudio: z.ZodOptional<z.ZodBoolean>;
     }, z.core.$strip>>>;
     soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
         provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -10611,6 +10615,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
                 }>;
             }, z.core.$strip>>>;
             concurrency: z.ZodOptional<z.ZodNumber>;
+            generateAudio: z.ZodOptional<z.ZodBoolean>;
             speed: z.ZodOptional<z.ZodNumber>;
         }, z.core.$strip>>;
         soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -11175,6 +11180,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
                 }>;
             }, z.core.$strip>>>;
             concurrency: z.ZodOptional<z.ZodNumber>;
+            generateAudio: z.ZodOptional<z.ZodBoolean>;
         }, z.core.$strip>>>;
         soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
             provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -14208,6 +14214,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
                     }>;
                 }, z.core.$strip>>>;
                 concurrency: z.ZodOptional<z.ZodNumber>;
+                generateAudio: z.ZodOptional<z.ZodBoolean>;
                 speed: z.ZodOptional<z.ZodNumber>;
             }, z.core.$strip>>;
             soundEffectParams: z.ZodOptional<z.ZodObject<{
@@ -14708,6 +14715,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
                 }>;
             }, z.core.$strip>>>;
             concurrency: z.ZodOptional<z.ZodNumber>;
+            generateAudio: z.ZodOptional<z.ZodBoolean>;
         }, z.core.$strip>>>;
         soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
             provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
@@ -15206,6 +15214,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
                 }>;
             }, z.core.$strip>>>;
             concurrency: z.ZodOptional<z.ZodNumber>;
+            generateAudio: z.ZodOptional<z.ZodBoolean>;
         }, z.core.$strip>>>;
         soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
             provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{

package/lib/types/schema.js CHANGED Viewed

@@ -499,6 +499,7 @@ export const mulmoMovieParamsSchema = z.object({
         .optional()
         .describe("Style/asset reference images (Veo 3.1). Mutually exclusive with imageName/lastFrameImageName"),
     concurrency: z.number().int().positive().optional().describe("Max concurrent movie generation requests"),
+    generateAudio: z.boolean().optional().describe("Request audio generation in the video (model-dependent)"),
 });
 export const mulmoBeatSchema = z
     .object({

package/lib/utils/context.d.ts CHANGED Viewed

@@ -282,6 +282,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
                 referenceType: "ASSET" | "STYLE";
             }[] | undefined;
             concurrency?: number | undefined;
+            generateAudio?: boolean | undefined;
         };
         soundEffectParams: {
             provider?: string | undefined;
@@ -2022,6 +2023,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
                     referenceType: "ASSET" | "STYLE";
                 }[] | undefined;
                 concurrency?: number | undefined;
+                generateAudio?: boolean | undefined;
                 speed?: number | undefined;
             } | undefined;
             soundEffectParams?: {
@@ -2518,6 +2520,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
                     referenceType: "ASSET" | "STYLE";
                 }[] | undefined;
                 concurrency?: number | undefined;
+                generateAudio?: boolean | undefined;
             };
             soundEffectParams: {
                 provider?: string | undefined;
@@ -4258,6 +4261,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
                         referenceType: "ASSET" | "STYLE";
                     }[] | undefined;
                     concurrency?: number | undefined;
+                    generateAudio?: boolean | undefined;
                     speed?: number | undefined;
                 } | undefined;
                 soundEffectParams?: {
@@ -4761,6 +4765,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
                 referenceType: "ASSET" | "STYLE";
             }[] | undefined;
             concurrency?: number | undefined;
+            generateAudio?: boolean | undefined;
         };
         soundEffectParams: {
             provider?: string | undefined;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "2.6.7",
+  "version": "2.6.9",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",
@@ -89,7 +89,7 @@
   "homepage": "https://github.com/receptron/mulmocast-cli#readme",
   "dependencies": {
     "@google-cloud/text-to-speech": "^6.4.0",
-    "@google/genai": "^1.49.0",
+    "@google/genai": "^1.50.1",
     "@graphai/anthropic_agent": "^2.0.12",
     "@graphai/browserless_agent": "^2.0.2",
     "@graphai/gemini_agent": "^2.0.5",
@@ -99,8 +99,8 @@
     "@graphai/stream_agent_filter": "^2.0.3",
     "@graphai/vanilla": "^2.0.12",
     "@graphai/vanilla_node_agents": "^2.0.4",
-    "@inquirer/input": "^5.0.11",
-    "@inquirer/select": "^5.1.3",
+    "@inquirer/input": "^5.0.12",
+    "@inquirer/select": "^5.1.4",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@mozilla/readability": "^0.6.0",
     "@tavily/core": "^0.5.11",
@@ -109,11 +109,11 @@
     "dotenv": "^17.4.2",
     "fluent-ffmpeg": "^2.1.3",
     "graphai": "^2.0.16",
-    "jsdom": "^29.0.2",
-    "marked": "^18.0.0",
+    "jsdom": "^29.1.0",
+    "marked": "^18.0.2",
     "mulmocast-vision": "^1.0.9",
-    "ora": "^9.3.0",
-    "puppeteer": "^24.40.0",
+    "ora": "^9.4.0",
+    "puppeteer": "^24.42.0",
     "replicate": "^1.4.0",
     "yaml": "^2.8.3",
     "yargs": "^18.0.0",
@@ -127,16 +127,16 @@
     "@types/jsdom": "^28.0.1",
     "@types/yargs": "^17.0.35",
     "cross-env": "^10.1.0",
-    "eslint": "^10.2.0",
+    "eslint": "^10.2.1",
     "eslint-config-prettier": "^10.1.8",
     "eslint-plugin-import": "^2.32.0",
     "eslint-plugin-prettier": "^5.5.5",
-    "eslint-plugin-sonarjs": "^4.0.2",
+    "eslint-plugin-sonarjs": "^4.0.3",
     "globals": "^17.5.0",
-    "prettier": "^3.8.2",
+    "prettier": "^3.8.3",
     "tsx": "^4.21.0",
-    "typescript": "6.0.2",
-    "typescript-eslint": "^8.58.1"
+    "typescript": "6.0.3",
+    "typescript-eslint": "^8.59.1"
   },
   "engines": {
     "node": ">=22.0.0"

package/scripts/test/README.md CHANGED Viewed

@@ -106,7 +106,7 @@ Language setting tests
 Provider-specific feature tests
 - [**test_hello_google.json**](./test_hello_google.json) - Google TTS専用テスト / Google TTS specific test
-- [**gpt.json**](./gpt.json) - GPTモデルテスト / GPT model test
+- [**test_gpt_image.json**](./test_gpt_image.json) - GPT image model test
 - [**mulmo_story.json**](./mulmo_story.json) - ストーリー形式テスト / Story format test
 - [**nano_banana.json**](./nano_banana.json) - カスタムサンプル / Custom sample

package/scripts/test/test_generate_audio.json ADDED Viewed

@@ -0,0 +1,33 @@
+{
+  "$mulmocast": { "version": "1.1" },
+  "title": "generateAudio Test",
+  "lang": "en",
+  "canvasSize": { "width": 1280, "height": 720 },
+  "audioParams": {
+    "bgmVolume": 0,
+    "suppressSpeech": true
+  },
+  "movieParams": {
+    "provider": "replicate",
+    "model": "kwaivgi/kling-v3-video",
+    "concurrency": 1
+  },
+  "beats": [
+    {
+      "text": "Audio ON (generateAudio: true)",
+      "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
+      "movieParams": {
+        "generateAudio": true
+      },
+      "duration": 3
+    },
+    {
+      "text": "Audio OFF (generateAudio: false)",
+      "moviePrompt": "A cat meowing and walking across a wooden floor, indoor scene",
+      "movieParams": {
+        "generateAudio": false
+      },
+      "duration": 3
+    }
+  ]
+}

package/scripts/test/{gpt.json → test_gpt_image.json} RENAMED Viewed

@@ -21,6 +21,14 @@
     }
   },
   "beats": [
+    {
+      "speaker": "Host",
+      "text": "How are you?",
+      "imagePrompt": "A witch in Harajuku",
+      "imageParams": {
+        "model": "gpt-image-2"
+      }
+    },
     {
       "speaker": "Host",
       "text": "How are you?",

package/scripts/test/test_seedance.json ADDED Viewed

@@ -0,0 +1,61 @@
+{
+  "$mulmocast": { "version": "1.1" },
+  "movieParams": {
+    "provider": "replicate",
+    "model": "bytedance/seedance-2.0"
+  },
+  "audioParams": {
+    "bgmVolume": 0
+  },
+  "captionParams": {
+    "lang": "en"
+  },
+  "lang": "en",
+  "beats": [
+    {
+      "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
+      "image": {
+        "type": "textSlide",
+        "slide": {
+          "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
+        }
+      }
+    },
+    {
+      "id": "seedance-1-lite",
+      "text": "bytedance/seedance-1-lite",
+      "duration": 5,
+      "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-1-lite"
+      }
+    },
+    {
+      "id": "seedance-1-pro",
+      "text": "bytedance/seedance-1-pro",
+      "duration": 5,
+      "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-1-pro"
+      }
+    },
+    {
+      "id": "seedance-2.0",
+      "text": "bytedance/seedance-2.0",
+      "duration": 10,
+      "moviePrompt": "A high-energy 10-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-2.0"
+      }
+    },
+    {
+      "id": "seedance-2.0-fast",
+      "text": "bytedance/seedance-2.0-fast",
+      "duration": 8,
+      "moviePrompt": "A high-energy 8-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-2.0-fast"
+      }
+    }
+  ]
+}

package/scripts/test/test_seedance2_only.json ADDED Viewed

@@ -0,0 +1,43 @@
+{
+  "$mulmocast": { "version": "1.1" },
+  "movieParams": {
+    "provider": "replicate",
+    "model": "bytedance/seedance-2.0"
+  },
+  "audioParams": {
+    "bgmVolume": 0
+  },
+  "captionParams": {
+    "lang": "en"
+  },
+  "lang": "en",
+  "beats": [
+    {
+      "text": "Comparing the bytedance seedance series with a fast-cut dance prompt",
+      "image": {
+        "type": "textSlide",
+        "slide": {
+          "title": "PROMPT: high-energy dance montage, quick cuts synced to the beat, neon-lit urban street"
+        }
+      }
+    },
+    {
+      "id": "seedance-2.0",
+      "text": "bytedance/seedance-2.0",
+      "duration": 5,
+      "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-2.0"
+      }
+    },
+    {
+      "id": "seedance-2.0-fast",
+      "text": "bytedance/seedance-2.0-fast",
+      "duration": 5,
+      "moviePrompt": "A high-energy 5-second dance video, photorealistic, vibrant lighting. Fast-paced montage with quick cuts synchronized to the beat: energetic dancer performing sharp hip-hop moves in an urban street at night, neon lights reflecting on wet pavement, quick close-ups on footwork and hand gestures, dynamic full-body shots with camera orbiting smoothly, realistic body physics and fabric movement, original choreography, no copyrighted elements.",
+      "movieParams": {
+        "model": "bytedance/seedance-2.0-fast"
+      }
+    }
+  ]
+}