npm - mulmocast - Versions diffs - 0.0.14 → 0.0.16 - Mend

mulmocast 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/README.md +5 -1
package/assets/html/pdf_handout.html +85 -0
package/assets/html/pdf_slide.html +55 -0
package/assets/html/pdf_talk.html +76 -0
package/assets/templates/text_and_image.json +6 -0
package/assets/templates/text_only.json +6 -0
package/lib/actions/audio.d.ts +3 -1
package/lib/actions/audio.js +84 -45
package/lib/actions/captions.js +1 -1
package/lib/actions/images.d.ts +89 -1
package/lib/actions/images.js +160 -99
package/lib/actions/movie.js +28 -21
package/lib/actions/pdf.d.ts +1 -0
package/lib/actions/pdf.js +134 -204
package/lib/actions/translate.js +1 -1
package/lib/agents/add_bgm_agent.js +3 -3
package/lib/agents/combine_audio_files_agent.js +11 -9
package/lib/agents/image_mock_agent.d.ts +4 -0
package/lib/agents/image_mock_agent.js +18 -0
package/lib/agents/index.d.ts +4 -1
package/lib/agents/index.js +4 -1
package/lib/agents/media_mock_agent.d.ts +4 -0
package/lib/agents/media_mock_agent.js +18 -0
package/lib/agents/tavily_agent.d.ts +15 -0
package/lib/agents/tavily_agent.js +130 -0
package/lib/agents/tts_openai_agent.js +9 -1
package/lib/cli/commands/audio/builder.d.ts +4 -0
package/lib/cli/commands/image/builder.d.ts +4 -0
package/lib/cli/commands/movie/builder.d.ts +4 -0
package/lib/cli/commands/pdf/builder.d.ts +4 -0
package/lib/cli/commands/translate/builder.d.ts +4 -0
package/lib/cli/common.d.ts +4 -0
package/lib/cli/common.js +11 -0
package/lib/cli/helpers.d.ts +5 -1
package/lib/cli/helpers.js +19 -2
package/lib/methods/index.d.ts +1 -1
package/lib/methods/index.js +1 -1
package/lib/methods/mulmo_presentation_style.d.ts +14 -0
package/lib/methods/mulmo_presentation_style.js +70 -0
package/lib/methods/mulmo_script.d.ts +1 -1
package/lib/methods/mulmo_script.js +2 -2
package/lib/methods/mulmo_studio_context.d.ts +14 -0
package/lib/methods/mulmo_studio_context.js +20 -2
package/lib/tools/deep_research.d.ts +2 -0
package/lib/tools/deep_research.js +265 -0
package/lib/types/schema.d.ts +31 -0
package/lib/types/schema.js +1 -1
package/lib/types/type.d.ts +4 -1
package/lib/utils/ffmpeg_utils.d.ts +1 -0
package/lib/utils/ffmpeg_utils.js +10 -0
package/lib/utils/file.d.ts +1 -3
package/lib/utils/file.js +4 -11
package/lib/utils/filters.js +1 -0
package/lib/utils/markdown.js +1 -1
package/lib/utils/preprocess.js +1 -0
package/lib/utils/prompt.d.ts +3 -0
package/lib/utils/prompt.js +52 -0
package/package.json +10 -10
package/assets/font/NotoSansJP-Regular.ttf +0 -0
package/assets/music/StarsBeyondEx.mp3 +0 -0

package/README.md CHANGED Viewed

@@ -103,9 +103,13 @@ GOOGLE_PROJECT_ID=your_google_project_id
 See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
-#### (Optional) For Nijivoice's TTS model
+#### (Optional) For TTS models
 ```bash
+# For Nijivoice TTS
 NIJIVOICE_API_KEY=your_nijivoice_api_key
+# For ElevenLabs TTS
+ELEVENLABS_API_KEY=your_elevenlabs_api_key
 ```
 #### (Optional) to access web in mulmo tool

package/assets/html/pdf_handout.html ADDED Viewed

@@ -0,0 +1,85 @@
+<!DOCTYPE html>
+<html lang="${lang}">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>${title}</title>
+  <style>
+    @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+    body {
+      font-family: 'Noto Sans JP', sans-serif;
+      font-size: 16px;
+      line-height: 1.6;
+      color: #333;
+      background: #fff;
+    }
+    @page {
+      size: ${page_size};
+      margin: 0;
+    }
+    .page {
+      page-break-after: always;
+      width: 100%;
+      height: 100vh;
+      position: relative;
+      overflow: hidden;
+      padding: 15px;
+      display: ${page_layout};
+      ${page_direction}
+      gap: 15px;
+      background: #fff;
+    }
+    .page:last-child {
+      page-break-after: avoid;
+    }
+    img {
+      max-width: 100%;
+      max-height: 100%;
+      object-fit: contain;
+    }
+    .handout-item {
+      display: flex;
+      flex-direction: ${flex_direction};
+      border: 1px solid #ddd;
+      overflow: hidden;
+      ${item_flex}
+    }
+    .handout-image {
+      ${image_size}
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      background: #f9f9f9;
+      padding: 5px;
+    }
+    .handout-text {
+      ${text_size}
+      padding: 8px;
+      font-size: 14px;
+      overflow: hidden;
+      background: #fff;
+    }
+    .handout-text p {
+      margin: 0.3em 0;
+    }
+  </style>
+</head>
+<body>
+  ${pages}
+</body>
+</html>

package/assets/html/pdf_slide.html ADDED Viewed

@@ -0,0 +1,55 @@
+<!DOCTYPE html>
+<html lang="${lang}">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>${title}</title>
+  <style>
+    @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+    body {
+      font-family: 'Noto Sans JP', sans-serif;
+      font-size: 14px;
+      line-height: 1.6;
+      color: #333;
+      background: #fff;
+    }
+    @page {
+      size: ${page_size};
+      margin: 0;
+    }
+    .page {
+      page-break-after: always;
+      width: 100%;
+      height: 100vh;
+      position: relative;
+      overflow: hidden;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      background: #fff;
+    }
+    .page:last-child {
+      page-break-after: avoid;
+    }
+    img {
+      max-width: 100%;
+      max-height: 100%;
+      object-fit: contain;
+    }
+  </style>
+</head>
+<body>
+  ${pages}
+</body>
+</html>

package/assets/html/pdf_talk.html ADDED Viewed

@@ -0,0 +1,76 @@
+<!DOCTYPE html>
+<html lang="${lang}">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>${title}</title>
+  <style>
+    @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+    body {
+      font-family: 'Noto Sans JP', sans-serif;
+      font-size: 17px;
+      line-height: 1.4;
+      color: #333;
+      background: #fff;
+    }
+    @page {
+      size: ${page_size};
+      margin: 0;
+    }
+    .page {
+      page-break-after: always;
+      width: 100%;
+      height: 100vh;
+      position: relative;
+      overflow: hidden;
+      padding: 20px;
+      display: flex;
+      flex-direction: column;
+      background: #fff;
+    }
+    .page:last-child {
+      page-break-after: avoid;
+    }
+    img {
+      max-width: 100%;
+      max-height: 100%;
+      object-fit: contain;
+    }
+    .image-container {
+      flex: 1;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      margin-bottom: 20px;
+      border: 1px solid #ddd;
+      background: #f9f9f9;
+    }
+    .text-container {
+      padding: 10px;
+      background: #fff;
+      border-top: 2px solid #333;
+      min-height: 120px;
+    }
+    .text-container p {
+      margin: 0.5em 0;
+    }
+  </style>
+</head>
+<body>
+  ${pages}
+</body>
+</html>

package/assets/templates/text_and_image.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text and Image",
+  "description": "Template for Text and Image Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/text_only.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text Only",
+  "description": "Template for Text Only Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "text_only_template.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import "dotenv/config";
 import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext } from "../types/index.js";
+import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
+export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
 export declare const audioFilePath: (context: MulmoStudioContext) => string;
+export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
 export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/audio.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import "dotenv/config";
 import { GraphAI } from "graphai";
+import { TaskManager } from "graphai/lib/task_manager.js";
 import * as agents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
 import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
+import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -22,8 +23,9 @@ const provider_to_agent = {
     openai: "ttsOpenaiAgent",
     google: "ttsGoogleAgent",
     elevenlabs: "ttsElevenlabsAgent",
+    mock: "mediaMockAgent",
 };
-const getAudioPath = (context, beat, audioFile, audioDirPath) => {
+const getAudioPath = (context, beat, audioFile) => {
     if (beat.audio?.type === "audio") {
         const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
         if (path) {
@@ -31,37 +33,51 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
         }
         throw new Error("Invalid audio source");
     }
-    if (beat.text === "") {
+    if (beat.text === undefined || beat.text === "") {
         return undefined; // It indicates that the audio is not needed.
     }
-    return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
+    return audioFile;
+};
+const getAudioParam = (presentationStyle, beat) => {
+    const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
+    // Use speaker-specific provider if available, otherwise fall back to script-level provider
+    const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
+    const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
+    return { voiceId, provider, speechOptions };
+};
+export const getBeatAudioPath = (text, context, beat, lang) => {
+    const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
+    const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
+    const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
+    const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
+    const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
+    return getAudioPath(context, beat, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
-    const { lang } = context;
-    const speaker = context.studio.script.speechParams.speakers[beat.speaker];
-    const voiceId = speaker.voiceId;
-    const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
+    const { beat, studioBeat, multiLingual, context } = namedInputs;
+    const { lang, presentationStyle } = context;
     const text = localizedText(beat, multiLingual, lang);
-    // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = speaker.provider ?? context.studio.script.speechParams.provider;
-    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
-    const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
-    const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
+    const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
+    const audioPath = getBeatAudioPath(text, context, beat, lang);
     studioBeat.audioFile = audioPath;
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
         ttsAgent: provider_to_agent[provider],
-        studioBeat,
+        text,
         voiceId,
         speechOptions,
         audioPath,
-        text,
+        studioBeat,
         needsTTS,
     };
 };
 const graph_tts = {
     nodes: {
+        beat: {},
+        studioBeat: {},
+        multiLingual: {},
+        context: {},
+        __mapIndex: {},
         preprocessor: {
             agent: preprocessor,
             inputs: {
@@ -69,7 +85,6 @@ const graph_tts = {
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
                 context: ":context",
-                audioDirPath: ":audioDirPath",
             },
         },
         tts: {
@@ -99,8 +114,6 @@ const graph_data = {
         audioArtifactFilePath: {},
         audioCombinedFilePath: {},
         outputStudioFilePath: {},
-        audioDirPath: {},
-        audioSegmentDirPath: {},
         musicFile: {},
         map: {
             agent: "mapAgent",
@@ -108,8 +121,6 @@ const graph_data = {
                 rows: ":context.studio.script.beats",
                 studioBeat: ":context.studio.beats",
                 multiLingual: ":context.studio.multiLingual",
-                audioDirPath: ":audioDirPath",
-                audioSegmentDirPath: ":audioSegmentDirPath",
                 context: ":context",
             },
             params: {
@@ -121,7 +132,7 @@ const graph_data = {
         combineFiles: {
             agent: "combineAudioFilesAgent",
             inputs: {
-                map: ":map",
+                onComplete: ":map",
                 context: ":context",
                 combinedFileName: ":audioCombinedFilePath",
             },
@@ -140,7 +151,7 @@ const graph_data = {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
                 outputFile: ":audioArtifactFilePath",
-                script: ":context.studio.script",
+                context: ":context",
                 params: {
                     musicFile: ":musicFile",
                 },
@@ -171,40 +182,68 @@ export const audioFilePath = (context) => {
     const { outDirPath } = fileDirs;
     return getAudioArtifactFilePath(outDirPath, studio.filename);
 };
+const getConcurrency = (context) => {
+    // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
+    const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
+        const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
+        return provider === "nijivoice" || provider === "elevenlabs";
+    });
+    return hasLimitedConcurrencyProvider ? 1 : 8;
+};
+const audioAgents = {
+    ...vanillaAgents,
+    fileWriteAgent,
+    ttsOpenaiAgent,
+    ttsNijivoiceAgent,
+    ttsGoogleAgent,
+    ttsElevenlabsAgent,
+    addBGMAgent,
+    combineAudioFilesAgent,
+};
+export const generateBeatAudio = async (index, context, callbacks) => {
+    try {
+        MulmoStudioContextMethods.setSessionState(context, "audio", true);
+        const { studio, fileDirs } = context;
+        const { outDirPath, audioDirPath } = fileDirs;
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        mkdir(outDirPath);
+        mkdir(audioSegmentDirPath);
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
+        graph.injectValue("__mapIndex", index);
+        graph.injectValue("beat", context.studio.script.beats[index]);
+        graph.injectValue("studioBeat", context.studio.beats[index]);
+        graph.injectValue("multiLingual", context.studio.multiLingual);
+        graph.injectValue("context", context);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
+        await graph.run();
+    }
+    finally {
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
+    }
+};
 export const audio = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const { studio, fileDirs, lang } = context;
         const { outDirPath, audioDirPath } = fileDirs;
         const audioArtifactFilePath = audioFilePath(context);
-        const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
-        const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
-        const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
-            const provider = speaker.provider ?? studio.script.speechParams.provider;
-            return provider === "nijivoice" || provider === "elevenlabs";
-        });
-        graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
-        const graph = new GraphAI(graph_data, {
-            ...vanillaAgents,
-            fileWriteAgent,
-            ttsOpenaiAgent,
-            ttsNijivoiceAgent,
-            ttsGoogleAgent,
-            ttsElevenlabsAgent,
-            addBGMAgent,
-            combineAudioFilesAgent,
-        }, { agentFilters });
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-        graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
-        graph.injectValue("audioDirPath", audioDirPath);
-        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
+        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
         if (callbacks) {
             callbacks.forEach((callback) => {
                 graph.registerCallback(callback);

package/lib/actions/captions.js CHANGED Viewed

@@ -26,7 +26,7 @@ const graph_data = {
                                 const { fileDirs } = namedInputs.context;
                                 const { caption } = context;
                                 const { imageDirPath } = fileDirs;
-                                const { canvasSize } = context.studio.script;
+                                const { canvasSize } = context.presentationStyle;
                                 const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
                                 const template = getHTMLFile("caption");
                                 const text = (() => {

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,3 +1,91 @@
 import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext } from "../types/index.js";
+import { MulmoStudioContext, MulmoBeat, Text2ImageAgentInfo } from "../types/index.js";
+export declare const imagePreprocessAgent: (namedInputs: {
+    context: MulmoStudioContext;
+    beat: MulmoBeat;
+    index: number;
+    suffix: string;
+    imageDirPath: string;
+    imageAgentInfo: Text2ImageAgentInfo;
+    imageRefs: Record<string, string>;
+}) => Promise<{
+    imageParams: {
+        model?: string | undefined;
+        style?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+    imagePath: string | undefined;
+    referenceImage: string | undefined;
+} | {
+    imagePath: string;
+    images: string[];
+    imageFromMovie: boolean;
+    imageParams: {
+        model?: string | undefined;
+        style?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+} | {
+    images: string[];
+    imageParams: {
+        model?: string | undefined;
+        style?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+    imagePath: string;
+    referenceImage: string;
+    prompt: string;
+}>;
 export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;