npm - mulmocast - Versions diffs - 0.1.7 → 1.1.1 - Mend

mulmocast 0.1.7 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/assets/templates/akira_comic.json +1 -1
package/assets/templates/ani.json +3 -3
package/assets/templates/ani_ja.json +4 -5
package/assets/templates/characters.json +1 -1
package/assets/templates/children_book.json +1 -1
package/assets/templates/comic_strips.json +1 -1
package/assets/templates/drslump_comic.json +1 -1
package/assets/templates/ghibli_comic.json +1 -1
package/assets/templates/ghibli_image_only.json +1 -1
package/assets/templates/ghibli_shorts.json +2 -3
package/assets/templates/ghost_comic.json +1 -1
package/assets/templates/onepiece_comic.json +1 -1
package/assets/templates/portrait_movie.json +1 -1
package/assets/templates/realistic_movie.json +1 -1
package/assets/templates/sensei_and_taro.json +4 -5
package/assets/templates/shorts.json +1 -1
package/assets/templates/trailer.json +1 -1
package/lib/actions/audio.js +6 -7
package/lib/actions/image_agents.d.ts +46 -76
package/lib/actions/image_agents.js +18 -3
package/lib/actions/images.js +65 -4
package/lib/actions/movie.js +3 -2
package/lib/agents/index.d.ts +3 -1
package/lib/agents/index.js +3 -1
package/lib/agents/lipsync_replicate_agent.d.ts +5 -0
package/lib/agents/lipsync_replicate_agent.js +57 -0
package/lib/agents/movie_replicate_agent.js +17 -5
package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
package/lib/agents/sound_effect_replicate_agent.js +59 -0
package/lib/data/index.d.ts +2 -0
package/lib/data/index.js +2 -0
package/lib/data/promptTemplates.d.ts +695 -0
package/lib/data/promptTemplates.js +957 -0
package/lib/data/scriptTemplates.d.ts +233 -0
package/lib/data/scriptTemplates.js +580 -0
package/lib/index.browser.d.ts +2 -1
package/lib/index.browser.js +2 -1
package/lib/mcp/server.js +2 -2
package/lib/methods/index.d.ts +1 -0
package/lib/methods/index.js +1 -0
package/lib/methods/mulmo_presentation_style.d.ts +18 -5
package/lib/methods/mulmo_presentation_style.js +31 -20
package/lib/methods/mulmo_script.d.ts +4 -0
package/lib/methods/mulmo_script.js +31 -0
package/lib/tools/story_to_script.js +2 -2
package/lib/types/agent.d.ts +19 -0
package/lib/types/schema.d.ts +628 -246
package/lib/types/schema.js +31 -12
package/lib/types/type.d.ts +2 -3
package/lib/utils/assets.d.ts +18 -0
package/lib/utils/assets.js +101 -0
package/lib/utils/context.d.ts +40 -12
package/lib/utils/context.js +3 -1
package/lib/utils/file.d.ts +12 -4
package/lib/utils/file.js +48 -24
package/lib/utils/preprocess.d.ts +30 -11
package/lib/utils/preprocess.js +7 -5
package/lib/utils/provider2agent.d.ts +30 -1
package/lib/utils/provider2agent.js +86 -0
package/lib/utils/utils.js +6 -0
package/package.json +8 -4
package/scripts/templates/business.json +1 -1
package/scripts/templates/children_book.json +1 -1
package/scripts/templates/coding.json +1 -1
package/scripts/templates/html.json +1 -1
package/scripts/templates/image_prompt_only_template.json +1 -1
package/scripts/templates/image_prompts_template.json +1 -1
package/scripts/templates/image_refs.json +1 -1
package/scripts/templates/movie_prompts_no_text_template.json +1 -1
package/scripts/templates/movie_prompts_template.json +1 -1
package/scripts/templates/presentation.json +1 -1
package/scripts/templates/sensei_and_taro.json +1 -1
package/scripts/templates/shorts_template.json +1 -1
package/scripts/templates/text_only_template.json +1 -1
package/scripts/templates/voice_over.json +1 -1

package/assets/templates/akira_comic.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/ani.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "title": "Presentation with Ani in Japanese",
-  "description": "Template for presentation with Ani in Japanese.",
+  "title": "Presentation with Ani",
+  "description": "Template for presentation with Ani.",
   "systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "movieParams": {

package/assets/templates/ani_ja.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "title": "Presentation with Ani",
-  "description": "Template for presentation with Ani.",
+  "title": "Presentation with Ani in Japanese",
+  "description": "Template for presentation with Ani in Japanese.",
   "systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "movieParams": {
@@ -23,9 +23,8 @@
       "height": 1536
     },
     "speechParams": {
-      "provider": "nijivoice",
       "speakers": {
-        "Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
+        "Presenter": { "provider": "nijivoice", "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
       }
     },
     "imageParams": {

package/assets/templates/characters.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "description": "Template for story with multiple characters.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/children_book.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/comic_strips.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/drslump_comic.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/ghibli_comic.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/ghibli_image_only.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/ghibli_shorts.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {
@@ -12,9 +12,8 @@
       "height": 1536
     },
     "speechParams": {
-      "provider": "nijivoice",
       "speakers": {
-        "Presenter": { "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
+        "Presenter": { "provider": "nijivoice", "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
       }
     },
     "imageParams": {

package/assets/templates/ghost_comic.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/onepiece_comic.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/portrait_movie.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/realistic_movie.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {

package/assets/templates/sensei_and_taro.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0",
+      "version": "1.1",
       "credit": "closing"
     },
     "canvasSize": {
@@ -15,11 +15,10 @@
       "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
     },
     "speechParams": {
-      "provider": "nijivoice",
       "speakers": {
-        "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
-        "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
-        "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
+        "Announcer": { "provider": "nijivoice", "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
+        "Student": { "provider": "nijivoice", "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
+        "Teacher": { "provider": "nijivoice", "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
       }
     }
   },

package/assets/templates/shorts.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0"
+      "version": "1.1"
     },
     "canvasSize": {
       "width": 720,

package/assets/templates/trailer.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
   "presentationStyle": {
     "$mulmocast": {
-      "version": "1.0"
+      "version": "1.1"
     },
     "canvasSize": {
       "width": 1280,

package/lib/actions/audio.js CHANGED Viewed

@@ -9,6 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
+import { text2SpeechProviderSchema, } from "../types/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
@@ -30,12 +31,10 @@ const getAudioPath = (context, beat, audioFile) => {
     return audioFile;
 };
 const getAudioParam = (presentationStyle, beat) => {
-    const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
-    // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
-    const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
-    const model = MulmoPresentationStyleMethods.getTTSModel(presentationStyle, beat);
-    return { voiceId, provider, speechOptions, model };
+    const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
+    const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
+    const provider = text2SpeechProviderSchema.parse(speaker.provider);
+    return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
 };
 export const getBeatAudioPath = (text, context, beat, lang) => {
     const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
@@ -183,7 +182,7 @@ export const audioFilePath = (context) => {
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
     const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
-        const provider = (speaker.provider ?? context.presentationStyle.speechParams.provider);
+        const provider = text2SpeechProviderSchema.parse(speaker.provider);
         return provider2TTSAgent[provider].hasLimitedConcurrency;
     });
     return hasLimitedConcurrencyProvider ? 1 : 8;

package/lib/actions/image_agents.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension } from "../types/index.js";
+import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension, MulmoImageParams } from "../types/index.js";
 export declare const imagePreprocessAgent: (namedInputs: {
     context: MulmoStudioContext;
     beat: MulmoBeat;
@@ -12,32 +12,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
 } | {
     imagePath: string | undefined;
     referenceImageForMovie: string | undefined;
-    imageParams: {
-        provider: string;
-        model?: string | undefined;
-        style?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        } | {
-            type: "imagePrompt";
-            prompt: string;
-        }> | undefined;
-    };
+    imageParams: MulmoImageParams;
     movieFile: string | undefined;
+    soundEffectFile?: string;
+    soundEffectPrompt?: string;
+    soundEffectModel?: string;
+    soundEffectAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    lipSyncFile?: string;
+    lipSyncModel?: string;
+    lipSyncAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    audioFile?: string;
     htmlPrompt?: undefined;
     htmlPath?: undefined;
     htmlImageSystemPrompt?: undefined;
@@ -59,32 +49,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
             } | undefined;
         };
     };
-    imageParams: {
-        provider: string;
-        model?: string | undefined;
-        style?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        } | {
-            type: "imagePrompt";
-            prompt: string;
-        }> | undefined;
-    };
+    imageParams: MulmoImageParams;
     movieFile: string | undefined;
+    soundEffectFile?: string;
+    soundEffectPrompt?: string;
+    soundEffectModel?: string;
+    soundEffectAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    lipSyncFile?: string;
+    lipSyncModel?: string;
+    lipSyncAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    audioFile?: string;
     htmlPrompt?: undefined;
     htmlPath?: undefined;
     htmlImageSystemPrompt?: undefined;
@@ -109,32 +89,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
             } | undefined;
         };
     };
-    imageParams: {
-        provider: string;
-        model?: string | undefined;
-        style?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        } | {
-            type: "imagePrompt";
-            prompt: string;
-        }> | undefined;
-    };
+    imageParams: MulmoImageParams;
     movieFile: string | undefined;
+    soundEffectFile?: string;
+    soundEffectPrompt?: string;
+    soundEffectModel?: string;
+    soundEffectAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    lipSyncFile?: string;
+    lipSyncModel?: string;
+    lipSyncAgentInfo?: {
+        agentName: string;
+        defaultModel: string;
+    };
+    audioFile?: string;
     htmlPrompt?: undefined;
     htmlPath?: undefined;
     htmlImageSystemPrompt?: undefined;

package/lib/actions/image_agents.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods } from "../methods/index.js";
-import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
+import { getBeatPngImagePath, getBeatMoviePaths } from "../utils/file.js";
 import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
 import { renderHTMLToImage } from "../utils/markdown.js";
 import { GraphAILogger } from "graphai";
@@ -18,10 +18,25 @@ export const imagePreprocessAgent = async (namedInputs) => {
         return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
     }
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
+    const moviePaths = getBeatMoviePaths(context, index);
     const returnValue = {
         imageParams: imageAgentInfo.imageParams,
-        movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
+        movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
     };
+    if (beat.soundEffectPrompt) {
+        returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
+        returnValue.soundEffectModel =
+            beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
+        returnValue.soundEffectFile = moviePaths.soundEffectFile;
+        returnValue.soundEffectPrompt = beat.soundEffectPrompt;
+    }
+    if (beat.enableLipSync) {
+        returnValue.lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
+        returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? returnValue.lipSyncAgentInfo.defaultModel;
+        returnValue.lipSyncFile = moviePaths.lipSyncFile;
+        // Audio file will be set from the beat's audio file when available
+        returnValue.audioFile = context.studio.beats[index]?.audioFile;
+    }
     if (beat.image) {
         const plugin = MulmoBeatMethods.getPlugin(beat);
         const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
@@ -29,7 +44,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
         return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
     }
     const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
-    GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
+    GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, returnValue.soundEffectAgentInfo, "\n", beat.moviePrompt, beat.soundEffectPrompt);
     if (beat.moviePrompt && !beat.imagePrompt) {
         return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
     }

package/lib/actions/images.js CHANGED Viewed

@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
+import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, } from "../agents/index.js";
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
@@ -23,10 +23,18 @@ const movieAgents = {
     movieGoogleAgent,
     movieReplicateAgent,
 };
+const soundEffectAgents = {
+    soundEffectReplicateAgent,
+};
+const lipSyncAgents = {
+    lipSyncReplicateAgent,
+};
 const defaultAgents = {
     ...vanillaAgents,
     ...imageAgents,
     ...movieAgents,
+    ...soundEffectAgents,
+    ...lipSyncAgents,
     mediaMockAgent,
     fileWriteAgent,
     openAIAgent,
@@ -167,6 +175,10 @@ const beat_graph_data = {
         },
         audioChecker: {
             agent: async (namedInputs) => {
+                if (namedInputs.soundEffectFile) {
+                    // NOTE: We intentinonally don't check lipSyncFile here.
+                    return { hasMovieAudio: true };
+                }
                 const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
                 if (!sourceFile) {
                     return { hasMovieAudio: false };
@@ -175,22 +187,71 @@ const beat_graph_data = {
                 return { hasMovieAudio: hasAudio };
             },
             inputs: {
-                onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
+                onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"], // to wait for movieGenerator, htmlImageGenerator, soundEffectGenerator, and lipSyncGenerator to finish
                 movieFile: ":preprocessor.movieFile",
                 imageFile: ":preprocessor.imagePath",
+                soundEffectFile: ":preprocessor.soundEffectFile",
+            },
+        },
+        soundEffectGenerator: {
+            if: ":preprocessor.soundEffectPrompt",
+            agent: ":preprocessor.soundEffectAgentInfo.agentName",
+            inputs: {
+                onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
+                prompt: ":preprocessor.soundEffectPrompt",
+                movieFile: ":preprocessor.movieFile",
+                soundEffectFile: ":preprocessor.soundEffectFile",
+                params: {
+                    model: ":preprocessor.soundEffectModel",
+                    duration: ":beat.duration",
+                },
+                cache: {
+                    force: [":context.force"],
+                    file: ":preprocessor.soundEffectFile",
+                    index: ":__mapIndex",
+                    sessionType: "soundEffect",
+                    mulmoContext: ":context",
+                },
+            },
+            defaultValue: {},
+        },
+        lipSyncGenerator: {
+            if: ":beat.enableLipSync",
+            agent: ":preprocessor.lipSyncAgentInfo.agentName",
+            inputs: {
+                onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
+                movieFile: ":preprocessor.movieFile",
+                audioFile: ":preprocessor.audioFile",
+                lipSyncFile: ":preprocessor.lipSyncFile",
+                params: {
+                    model: ":preprocessor.lipSyncModel",
+                    duration: ":beat.duration",
+                },
+                cache: {
+                    force: [":context.force"],
+                    file: ":preprocessor.lipSyncFile",
+                    index: ":__mapIndex",
+                    sessionType: "lipSync",
+                    mulmoContext: ":context",
+                },
             },
+            defaultValue: {},
         },
         output: {
             agent: "copyAgent",
             inputs: {
-                onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
+                onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator", ":lipSyncGenerator"], // to wait for imageFromMovie, soundEffectGenerator, and lipSyncGenerator to finish
                 imageFile: ":preprocessor.imagePath",
                 movieFile: ":preprocessor.movieFile",
+                soundEffectFile: ":preprocessor.soundEffectFile",
+                lipSyncFile: ":preprocessor.lipSyncFile",
                 hasMovieAudio: ":audioChecker.hasMovieAudio",
             },
             output: {
                 imageFile: ".imageFile",
                 movieFile: ".movieFile",
+                soundEffectFile: ".soundEffectFile",
+                lipSyncFile: ".lipSyncFile",
                 hasMovieAudio: ".hasMovieAudio",
             },
             isResult: true,
@@ -284,7 +345,7 @@ export const graphOption = async (context, settings) => {
             {
                 name: "fileCacheAgentFilter",
                 agent: fileCacheAgentFilter,
-                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
+                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator"],
             },
         ],
         taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),

package/lib/actions/movie.js CHANGED Viewed

@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
             beatTimestamps.push(timestamp);
             return timestamp; // Skip voice-over beats.
         }
-        const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
+        const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
         assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
         assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
         const extraPadding = (() => {
@@ -183,7 +183,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
         const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
         const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
         const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
-        const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
+        const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
         const speed = beat.movieParams?.speed ?? 1.0;
         const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
         ffmpegContext.filterComplex.push(videoPart);
@@ -206,6 +206,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
         // NOTE: We don't support audio if the speed is not 1.0.
         const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
         if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
+            // TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
             audioIdsFromMovieBeats.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);

package/lib/agents/index.d.ts CHANGED Viewed

@@ -10,8 +10,10 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
 import ttsOpenaiAgent from "./tts_openai_agent.js";
 import validateSchemaAgent from "./validate_schema_agent.js";
+import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
+import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
 import { browserlessAgent } from "@graphai/browserless_agent";
 import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };

package/lib/agents/index.js CHANGED Viewed

@@ -10,9 +10,11 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
 import ttsOpenaiAgent from "./tts_openai_agent.js";
 import validateSchemaAgent from "./validate_schema_agent.js";
+import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
+import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
 import { browserlessAgent } from "@graphai/browserless_agent";
 import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 // import * as vanilla from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };

package/lib/agents/lipsync_replicate_agent.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import type { AgentFunction, AgentFunctionInfo } from "graphai";
+import type { AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentParams, ReplicateLipSyncAgentConfig } from "../types/agent.js";
+export declare const lipSyncReplicateAgent: AgentFunction<ReplicateLipSyncAgentParams, AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentConfig>;
+declare const lipSyncReplicateAgentInfo: AgentFunctionInfo;
+export default lipSyncReplicateAgentInfo;