npm - mulmocast - Versions diffs - 0.0.15 → 0.0.17 - Mend

mulmocast 0.0.15 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/assets/templates/text_and_image.json +6 -0
package/assets/templates/text_only.json +6 -0
package/lib/actions/audio.d.ts +4 -2
package/lib/actions/audio.js +89 -48
package/lib/actions/captions.d.ts +1 -1
package/lib/actions/captions.js +17 -14
package/lib/actions/images.d.ts +6 -3
package/lib/actions/images.js +64 -39
package/lib/actions/movie.js +19 -19
package/lib/actions/pdf.js +3 -4
package/lib/actions/translate.js +11 -11
package/lib/agents/add_bgm_agent.js +3 -3
package/lib/agents/combine_audio_files_agent.js +88 -42
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/tavily_agent.d.ts +15 -0
package/lib/agents/tavily_agent.js +130 -0
package/lib/cli/commands/audio/builder.d.ts +2 -0
package/lib/cli/commands/image/builder.d.ts +2 -0
package/lib/cli/commands/movie/builder.d.ts +2 -0
package/lib/cli/commands/movie/handler.js +1 -6
package/lib/cli/commands/pdf/builder.d.ts +2 -0
package/lib/cli/commands/translate/builder.d.ts +2 -0
package/lib/cli/common.d.ts +2 -0
package/lib/cli/common.js +6 -0
package/lib/cli/helpers.d.ts +7 -1
package/lib/cli/helpers.js +30 -3
package/lib/methods/index.d.ts +1 -1
package/lib/methods/index.js +1 -1
package/lib/methods/mulmo_presentation_style.d.ts +14 -0
package/lib/methods/mulmo_presentation_style.js +70 -0
package/lib/methods/mulmo_studio_context.d.ts +17 -0
package/lib/methods/mulmo_studio_context.js +30 -2
package/lib/tools/deep_research.d.ts +2 -0
package/lib/tools/deep_research.js +265 -0
package/lib/types/index.d.ts +0 -1
package/lib/types/index.js +0 -1
package/lib/types/schema.d.ts +101 -55
package/lib/types/schema.js +3 -3
package/lib/types/type.d.ts +5 -1
package/lib/utils/ffmpeg_utils.d.ts +1 -0
package/lib/utils/ffmpeg_utils.js +10 -0
package/lib/utils/file.d.ts +7 -4
package/lib/utils/file.js +24 -12
package/lib/utils/preprocess.d.ts +0 -9
package/lib/utils/preprocess.js +4 -10
package/lib/utils/prompt.d.ts +3 -0
package/lib/utils/prompt.js +52 -0
package/package.json +11 -10
package/assets/music/StarsBeyondEx.mp3 +0 -0

package/assets/templates/text_and_image.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text and Image",
+  "description": "Template for Text and Image Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/text_only.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text Only",
+  "description": "Template for Text Only Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "text_only_template.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import "dotenv/config";
 import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext } from "../types/index.js";
+import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
+export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
 export declare const audioFilePath: (context: MulmoStudioContext) => string;
-export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;

package/lib/actions/audio.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import "dotenv/config";
 import { GraphAI } from "graphai";
+import { TaskManager } from "graphai/lib/task_manager.js";
 import * as agents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
 import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
+import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -24,7 +25,7 @@ const provider_to_agent = {
     elevenlabs: "ttsElevenlabsAgent",
     mock: "mediaMockAgent",
 };
-const getAudioPath = (context, beat, audioFile, audioDirPath) => {
+const getAudioPath = (context, beat, audioFile) => {
     if (beat.audio?.type === "audio") {
         const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
         if (path) {
@@ -35,34 +36,48 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     if (beat.text === undefined || beat.text === "") {
         return undefined; // It indicates that the audio is not needed.
     }
-    return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
+    return audioFile;
+};
+const getAudioParam = (presentationStyle, beat) => {
+    const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
+    // Use speaker-specific provider if available, otherwise fall back to script-level provider
+    const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
+    const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
+    return { voiceId, provider, speechOptions };
+};
+export const getBeatAudioPath = (text, context, beat, lang) => {
+    const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
+    const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
+    const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
+    const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
+    const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
+    return getAudioPath(context, beat, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
-    const { lang } = context;
-    const speaker = context.studio.script.speechParams.speakers[beat.speaker];
-    const voiceId = speaker.voiceId;
-    const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
+    const { beat, studioBeat, multiLingual, context } = namedInputs;
+    const { lang, presentationStyle } = context;
     const text = localizedText(beat, multiLingual, lang);
-    // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = speaker.provider ?? context.studio.script.speechParams.provider;
-    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
-    const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
-    const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
-    studioBeat.audioFile = audioPath;
+    const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
+    const audioPath = getBeatAudioPath(text, context, beat, lang);
+    studioBeat.audioFile = audioPath; // TODO
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
         ttsAgent: provider_to_agent[provider],
-        studioBeat,
+        text,
         voiceId,
         speechOptions,
         audioPath,
-        text,
+        studioBeat,
         needsTTS,
     };
 };
 const graph_tts = {
     nodes: {
+        beat: {},
+        studioBeat: {},
+        multiLingual: {},
+        context: {},
+        __mapIndex: {},
         preprocessor: {
             agent: preprocessor,
             inputs: {
@@ -70,7 +85,6 @@ const graph_tts = {
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
                 context: ":context",
-                audioDirPath: ":audioDirPath",
             },
         },
         tts: {
@@ -100,17 +114,13 @@ const graph_data = {
         audioArtifactFilePath: {},
         audioCombinedFilePath: {},
         outputStudioFilePath: {},
-        audioDirPath: {},
-        audioSegmentDirPath: {},
         musicFile: {},
         map: {
             agent: "mapAgent",
             inputs: {
                 rows: ":context.studio.script.beats",
                 studioBeat: ":context.studio.beats",
-                multiLingual: ":context.studio.multiLingual",
-                audioDirPath: ":audioDirPath",
-                audioSegmentDirPath: ":audioSegmentDirPath",
+                multiLingual: ":context.multiLingual",
                 context: ":context",
             },
             params: {
@@ -122,7 +132,7 @@ const graph_data = {
         combineFiles: {
             agent: "combineAudioFilesAgent",
             inputs: {
-                map: ":map",
+                onComplete: ":map",
                 context: ":context",
                 combinedFileName: ":audioCombinedFilePath",
             },
@@ -141,7 +151,7 @@ const graph_data = {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
                 outputFile: ":audioArtifactFilePath",
-                script: ":context.studio.script",
+                context: ":context",
                 params: {
                     musicFile: ":musicFile",
                 },
@@ -172,49 +182,80 @@ export const audioFilePath = (context) => {
     const { outDirPath } = fileDirs;
     return getAudioArtifactFilePath(outDirPath, studio.filename);
 };
+const getConcurrency = (context) => {
+    // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
+    const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
+        const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
+        return provider === "nijivoice" || provider === "elevenlabs";
+    });
+    return hasLimitedConcurrencyProvider ? 1 : 8;
+};
+const audioAgents = {
+    ...vanillaAgents,
+    fileWriteAgent,
+    ttsOpenaiAgent,
+    ttsNijivoiceAgent,
+    ttsGoogleAgent,
+    ttsElevenlabsAgent,
+    addBGMAgent,
+    combineAudioFilesAgent,
+};
+export const generateBeatAudio = async (index, context, callbacks) => {
+    try {
+        MulmoStudioContextMethods.setSessionState(context, "audio", true);
+        const { studio, fileDirs } = context;
+        const { outDirPath, audioDirPath } = fileDirs;
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        mkdir(outDirPath);
+        mkdir(audioSegmentDirPath);
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
+        graph.injectValue("__mapIndex", index);
+        graph.injectValue("beat", context.studio.script.beats[index]);
+        graph.injectValue("studioBeat", context.studio.beats[index]);
+        graph.injectValue("multiLingual", context.multiLingual);
+        graph.injectValue("context", context);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
+        await graph.run();
+    }
+    finally {
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
+    }
+};
 export const audio = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const { studio, fileDirs, lang } = context;
         const { outDirPath, audioDirPath } = fileDirs;
         const audioArtifactFilePath = audioFilePath(context);
-        const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
-        const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
-        const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
-            const provider = speaker.provider ?? studio.script.speechParams.provider;
-            return provider === "nijivoice" || provider === "elevenlabs";
-        });
-        graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
-        const graph = new GraphAI(graph_data, {
-            ...vanillaAgents,
-            fileWriteAgent,
-            ttsOpenaiAgent,
-            ttsNijivoiceAgent,
-            ttsGoogleAgent,
-            ttsElevenlabsAgent,
-            addBGMAgent,
-            combineAudioFilesAgent,
-        }, { agentFilters });
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-        graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
-        graph.injectValue("audioDirPath", audioDirPath);
-        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
+        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
         if (callbacks) {
             callbacks.forEach((callback) => {
                 graph.registerCallback(callback);
             });
         }
-        await graph.run();
+        const result = await graph.run();
         writingMessage(audioCombinedFilePath);
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
+        return result.combineFiles;
     }
-    finally {
+    catch (__error) {
         MulmoStudioContextMethods.setSessionState(context, "audio", false);
+        throw __error;
     }
 };

package/lib/actions/captions.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 import { MulmoStudioContext } from "../types/index.js";
 import type { CallbackFunction } from "graphai";
-export declare const captions: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const captions: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;

package/lib/actions/captions.js CHANGED Viewed

@@ -26,11 +26,11 @@ const graph_data = {
                                 const { fileDirs } = namedInputs.context;
                                 const { caption } = context;
                                 const { imageDirPath } = fileDirs;
-                                const { canvasSize } = context.studio.script;
+                                const { canvasSize } = context.presentationStyle;
                                 const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
                                 const template = getHTMLFile("caption");
                                 const text = (() => {
-                                    const multiLingual = context.studio.multiLingual;
+                                    const multiLingual = context.multiLingual;
                                     if (caption && multiLingual) {
                                         return multiLingual[index].multiLingualTexts[caption].text;
                                     }
@@ -63,18 +63,21 @@ const graph_data = {
     },
 };
 export const captions = async (context, callbacks) => {
-    try {
-        MulmoStudioContextMethods.setSessionState(context, "caption", true);
-        const graph = new GraphAI(graph_data, { ...vanillaAgents });
-        graph.injectValue("context", context);
-        if (callbacks) {
-            callbacks.forEach((callback) => {
-                graph.registerCallback(callback);
-            });
+    if (context.caption) {
+        try {
+            MulmoStudioContextMethods.setSessionState(context, "caption", true);
+            const graph = new GraphAI(graph_data, { ...vanillaAgents });
+            graph.injectValue("context", context);
+            if (callbacks) {
+                callbacks.forEach((callback) => {
+                    graph.registerCallback(callback);
+                });
+            }
+            await graph.run();
+        }
+        finally {
+            MulmoStudioContextMethods.setSessionState(context, "caption", false);
         }
-        await graph.run();
-    }
-    finally {
-        MulmoStudioContextMethods.setSessionState(context, "caption", false);
     }
+    return context;
 };

package/lib/actions/images.d.ts CHANGED Viewed

@@ -4,8 +4,6 @@ export declare const imagePreprocessAgent: (namedInputs: {
     context: MulmoStudioContext;
     beat: MulmoBeat;
     index: number;
-    suffix: string;
-    imageDirPath: string;
     imageAgentInfo: Text2ImageAgentInfo;
     imageRefs: Record<string, string>;
 }) => Promise<{
@@ -32,8 +30,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     movieFile: string | undefined;
     imagePath: string | undefined;
+    referenceImage: string | undefined;
 } | {
+    imagePath: string;
     images: string[];
+    imageFromMovie: boolean;
     imageParams: {
         model?: string | undefined;
         style?: string | undefined;
@@ -81,7 +82,9 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     movieFile: string | undefined;
     imagePath: string;
+    referenceImage: string;
     prompt: string;
 }>;
-export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
+export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
 export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -1,41 +1,43 @@
 import dotenv from "dotenv";
 import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
+import { TaskManager } from "graphai/lib/task_manager.js";
 import * as agents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
+import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
-import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
 const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
-const htmlStyle = (script, beat) => {
+import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
+const htmlStyle = (context, beat) => {
     return {
-        canvasSize: MulmoScriptMethods.getCanvasSize(script),
-        textSlideStyle: MulmoScriptMethods.getTextSlideStyle(script, beat),
+        canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
+        textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
     };
 };
 export const imagePreprocessAgent = async (namedInputs) => {
-    const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
+    const { context, beat, index, imageAgentInfo, imageRefs } = namedInputs;
     const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
-    const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
+    const imagePath = getBeatPngImagePath(context, index);
     const returnValue = {
         imageParams,
-        movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
+        movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
     };
     if (beat.image) {
         const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
         if (plugin) {
             try {
                 MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
-                const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
+                const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
                 const path = await plugin.process(processorParams);
                 // undefined prompt indicates that image generation is not needed
-                return { imagePath: path, ...returnValue };
+                return { imagePath: path, referenceImage: path, ...returnValue };
             }
             finally {
                 MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
@@ -49,17 +51,16 @@ export const imagePreprocessAgent = async (namedInputs) => {
         return sources.filter((source) => source !== undefined);
     })();
     if (beat.moviePrompt && !beat.imagePrompt) {
-        return { ...returnValue, images }; // no image prompt, only movie prompt
+        return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
     }
     const prompt = imagePrompt(beat, imageParams.style);
-    return { imagePath, prompt, ...returnValue, images };
+    return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
 };
 const beat_graph_data = {
     version: 0.5,
     concurrency: 4,
     nodes: {
         context: {},
-        imageDirPath: {},
         imageAgentInfo: {},
         movieAgentInfo: {},
         imageRefs: {},
@@ -71,8 +72,6 @@ const beat_graph_data = {
                 context: ":context",
                 beat: ":beat",
                 index: ":__mapIndex",
-                suffix: "p",
-                imageDirPath: ":imageDirPath",
                 imageAgentInfo: ":imageAgentInfo",
                 imageRefs: ":imageRefs",
             },
@@ -93,7 +92,7 @@ const beat_graph_data = {
                 params: {
                     model: ":preprocessor.imageParams.model",
                     moderation: ":preprocessor.imageParams.moderation",
-                    canvasSize: ":context.studio.script.canvasSize",
+                    canvasSize: ":context.presentationStyle.canvasSize",
                 },
             },
             defaultValue: {},
@@ -104,24 +103,37 @@ const beat_graph_data = {
             inputs: {
                 onComplete: ":imageGenerator", // to wait for imageGenerator to finish
                 prompt: ":beat.moviePrompt",
-                imagePath: ":preprocessor.imagePath",
+                imagePath: ":preprocessor.referenceImage",
                 file: ":preprocessor.movieFile",
                 studio: ":context.studio", // for cache
                 mulmoContext: ":context", // for fileCacheAgentFilter
                 index: ":__mapIndex", // for cache
                 sessionType: "movie", // for cache
                 params: {
-                    model: ":context.studio.script.movieParams.model",
+                    model: ":context.presentationStyle.movieParams.model",
                     duration: ":beat.duration",
-                    canvasSize: ":context.studio.script.canvasSize",
+                    canvasSize: ":context.presentationStyle.canvasSize",
                 },
             },
             defaultValue: {},
         },
+        imageFromMovie: {
+            if: ":preprocessor.imageFromMovie",
+            agent: async (namedInputs) => {
+                await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
+                return { generatedImage: true };
+            },
+            inputs: {
+                onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                imageFile: ":preprocessor.imagePath",
+                movieFile: ":preprocessor.movieFile",
+            },
+            defaultValue: { generatedImage: false },
+        },
         output: {
             agent: "copyAgent",
             inputs: {
-                onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
                 imageFile: ":preprocessor.imagePath",
                 movieFile: ":preprocessor.movieFile",
             },
@@ -138,7 +150,6 @@ const graph_data = {
     concurrency: 4,
     nodes: {
         context: {},
-        imageDirPath: {},
         imageAgentInfo: {},
         movieAgentInfo: {},
         outputStudioFilePath: {},
@@ -150,7 +161,6 @@ const graph_data = {
                 context: ":context",
                 imageAgentInfo: ":imageAgentInfo",
                 movieAgentInfo: ":movieAgentInfo",
-                imageDirPath: ":imageDirPath",
                 imageRefs: ":imageRefs",
             },
             isResult: true,
@@ -185,7 +195,10 @@ const graph_data = {
                         }
                     }
                 });
-                return { studio };
+                return {
+                    ...context,
+                    studio,
+                };
             },
             inputs: {
                 array: ":map.output",
@@ -217,7 +230,6 @@ const googleAuth = async () => {
     }
 };
 const graphOption = async (context) => {
-    const { studio } = context;
     const agentFilters = [
         {
             name: "fileCacheAgentFilter",
@@ -225,12 +237,14 @@ const graphOption = async (context) => {
             nodeIds: ["imageGenerator", "movieGenerator"],
         },
     ];
+    const taskManager = new TaskManager(getConcurrency(context));
     const options = {
         agentFilters,
+        taskManager,
     };
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     // We need to get google's auth token only if the google is the text2image provider.
-    if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
+    if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
         options.config = {
@@ -246,13 +260,10 @@ const graphOption = async (context) => {
     }
     return options;
 };
-const prepareGenerateImages = async (context) => {
-    const { studio, fileDirs } = context;
-    const { outDirPath, imageDirPath } = fileDirs;
-    mkdir(`${imageDirPath}/${studio.filename}`);
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script, context.dryRun);
+// TODO: unit test
+export const getImageRefs = async (context) => {
     const imageRefs = {};
-    const images = studio.script.imageParams?.images;
+    const images = context.presentationStyle.imageParams?.images;
     if (images) {
         await Promise.all(Object.keys(images).map(async (key) => {
             const image = images[key];
@@ -283,12 +294,21 @@ const prepareGenerateImages = async (context) => {
                         return "png"; // default
                     }
                 })();
-                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
+                const imagePath = getReferenceImagePath(context, key, extension);
                 await fs.promises.writeFile(imagePath, buffer);
                 imageRefs[key] = imagePath;
             }
         }));
     }
+    return imageRefs;
+};
+const prepareGenerateImages = async (context) => {
+    const { studio } = context;
+    const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
+    const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
+    mkdir(imageProjectDirPath);
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
+    const imageRefs = await getImageRefs(context);
     GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
     const injections = {
         context,
@@ -297,19 +317,21 @@ const prepareGenerateImages = async (context) => {
             agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
         },
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
-        imageDirPath,
         imageRefs,
     };
     return injections;
 };
-const generateImages = async (context, callbacks) => {
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(context.studio.script);
+const getConcurrency = (context) => {
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     if (imageAgentInfo.provider === "openai") {
         // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
         // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
         // gpt-image-1：3,000,000 TPM、150 images per minute
-        graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
+        return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
     }
+    return 4;
+};
+const generateImages = async (context, callbacks) => {
     const options = await graphOption(context);
     const injections = await prepareGenerateImages(context);
     const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
@@ -327,10 +349,13 @@ const generateImages = async (context, callbacks) => {
 export const images = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "image", true);
-        await generateImages(context, callbacks);
+        const newContext = await generateImages(context, callbacks);
+        MulmoStudioContextMethods.setSessionState(context, "image", false);
+        return newContext;
     }
-    finally {
+    catch (error) {
         MulmoStudioContextMethods.setSessionState(context, "image", false);
+        throw error;
     }
 };
 export const generateBeatImage = async (index, context, callbacks) => {