npm - mulmocast - Versions diffs - 0.0.10 → 0.0.12 - Mend

mulmocast 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/README.md +18 -3
package/assets/templates/ghibli_shorts.json +34 -0
package/assets/templates/shorts.json +18 -0
package/assets/templates/trailer.json +25 -0
package/lib/actions/audio.d.ts +2 -1
package/lib/actions/audio.js +35 -17
package/lib/actions/captions.js +5 -5
package/lib/actions/images.d.ts +2 -1
package/lib/actions/images.js +90 -58
package/lib/actions/movie.js +53 -16
package/lib/actions/pdf.js +3 -3
package/lib/actions/translate.d.ts +2 -1
package/lib/actions/translate.js +21 -16
package/lib/agents/combine_audio_files_agent.js +4 -0
package/lib/agents/image_google_agent.d.ts +4 -1
package/lib/agents/image_google_agent.js +3 -2
package/lib/agents/image_openai_agent.d.ts +5 -3
package/lib/agents/image_openai_agent.js +35 -7
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/movie_google_agent.d.ts +9 -2
package/lib/agents/movie_google_agent.js +24 -16
package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
package/lib/agents/tts_elevenlabs_agent.js +60 -0
package/lib/agents/tts_google_agent.js +1 -1
package/lib/agents/tts_nijivoice_agent.js +3 -2
package/lib/agents/tts_openai_agent.js +1 -1
package/lib/cli/commands/audio/handler.js +4 -1
package/lib/cli/commands/image/handler.js +4 -1
package/lib/cli/commands/movie/handler.js +4 -1
package/lib/cli/commands/pdf/handler.js +4 -1
package/lib/cli/commands/translate/handler.js +4 -1
package/lib/cli/helpers.d.ts +3 -3
package/lib/cli/helpers.js +38 -20
package/lib/index.d.ts +5 -0
package/lib/index.js +5 -0
package/lib/methods/mulmo_media_source.d.ts +1 -0
package/lib/methods/mulmo_media_source.js +12 -0
package/lib/methods/mulmo_script.d.ts +1 -1
package/lib/methods/mulmo_script.js +9 -5
package/lib/methods/mulmo_studio_context.d.ts +5 -0
package/lib/methods/mulmo_studio_context.js +23 -0
package/lib/types/index.d.ts +1 -0
package/lib/types/index.js +1 -0
package/lib/types/schema.d.ts +1513 -290
package/lib/types/schema.js +26 -35
package/lib/types/type.d.ts +4 -1
package/lib/utils/file.d.ts +5 -15
package/lib/utils/file.js +14 -21
package/lib/utils/filters.js +4 -4
package/lib/utils/image_plugins/beat.d.ts +4 -0
package/lib/utils/image_plugins/beat.js +7 -0
package/lib/utils/image_plugins/image.d.ts +1 -1
package/lib/utils/image_plugins/index.d.ts +2 -1
package/lib/utils/image_plugins/index.js +2 -1
package/lib/utils/image_plugins/movie.d.ts +1 -1
package/lib/utils/image_plugins/source.js +2 -2
package/lib/utils/preprocess.d.ts +26 -23
package/lib/utils/preprocess.js +4 -0
package/package.json +8 -8
package/scripts/templates/movie_prompts_no_text_template.json +50 -0
package/scripts/templates/shorts_template.json +52 -0

package/README.md CHANGED Viewed

@@ -90,11 +90,26 @@ Create a `.env` file in your project directory with the following API keys:
 ```bash
 OPENAI_API_KEY=your_openai_api_key
 ```
-### Optional
+#### (Optional) For the advanced image generation model
 ```bash
 DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
-GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
-NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
+```
+#### (Optional) For Google's image generation model
+```bash
+GOOGLE_PROJECT_ID=your_google_project_id
+```
+See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
+#### (Optional) For Nijivoice's TTS model
+```bash
+NIJIVOICE_API_KEY=your_nijivoice_api_key
+```
+#### (Optional) to access web in mulmo tool
+```bash
 BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
 ```

package/assets/templates/ghibli_shorts.json ADDED Viewed

@@ -0,0 +1,34 @@
+{
+  "title": "Ghibli comic style",
+  "description": "Template for Ghibli-style comic presentation.",
+  "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "speechParams": {
+      "provider": "nijivoice",
+      "speakers": {
+        "Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
+      }
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/shorts.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+  "title": "Short movie template",
+  "description": "Template for Youtube shorts.",
+  "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0"
+    },
+    "canvasSize": {
+      "width": 720,
+      "height": 1280
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>"
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/assets/templates/trailer.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "title": "Movie Trailer template",
+  "description": "Template for A Movie Trailer.",
+  "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0"
+    },
+    "canvasSize": {
+      "width": 1280,
+      "height": 720
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>"
+    },
+    "audioParams": {
+      "padding": 0.0,
+      "introPadding": 0.0,
+      "closingPadding": 0.0,
+      "outroPadding": 2.5,
+      "bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
+    }
+  },
+  "scriptName": "movie_prompts_no_text_template.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 import "dotenv/config";
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const audio: (context: MulmoStudioContext) => Promise<void>;
+export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/audio.js CHANGED Viewed

@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
 import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
 import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
 import ttsGoogleAgent from "../agents/tts_google_agent.js";
+import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
+import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
+import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 const vanillaAgents = agents.default ?? agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -19,10 +21,11 @@ const provider_to_agent = {
     nijivoice: "ttsNijivoiceAgent",
     openai: "ttsOpenaiAgent",
     google: "ttsGoogleAgent",
+    elevenlabs: "ttsElevenlabsAgent",
 };
 const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     if (beat.audio?.type === "audio") {
-        const path = resolveMediaSource(beat.audio.source, context);
+        const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
         if (path) {
             return path;
         }
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
+    const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
     const { lang } = context;
-    const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
+    const speaker = context.studio.script.speechParams.speakers[beat.speaker];
+    const voiceId = speaker.voiceId;
     const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
     const text = localizedText(beat, multiLingual, lang);
-    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
-    const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
+    // Use speaker-specific provider if available, otherwise fall back to script-level provider
+    const provider = speaker.provider ?? context.studio.script.speechParams.provider;
+    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
+    const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
     const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
     studioBeat.audioFile = audioPath;
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
-        ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
+        ttsAgent: provider_to_agent[provider],
         studioBeat,
         voiceId,
         speechOptions,
@@ -62,7 +68,6 @@ const graph_tts = {
                 beat: ":beat",
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
-                index: ":__mapIndex",
                 context: ":context",
                 audioDirPath: ":audioDirPath",
             },
@@ -74,7 +79,7 @@ const graph_tts = {
                 text: ":preprocessor.text",
                 file: ":preprocessor.audioPath",
                 force: ":context.force",
-                studio: ":context.studio", // for cache
+                mulmoContext: ":context", // for cache
                 index: ":__mapIndex", // for cache
                 sessionType: "audio", // for cache
                 params: {
@@ -96,6 +101,7 @@ const graph_data = {
         outputStudioFilePath: {},
         audioDirPath: {},
         audioSegmentDirPath: {},
+        musicFile: {},
         map: {
             agent: "mapAgent",
             inputs: {
@@ -130,14 +136,14 @@ const graph_data = {
         },
         addBGM: {
             agent: "addBGMAgent",
-            params: {
-                musicFile: process.env.PATH_BGM ?? defaultBGMPath,
-            },
             inputs: {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
                 outputFile: ":audioArtifactFilePath",
                 script: ":context.studio.script",
+                params: {
+                    musicFile: ":musicFile",
+                },
             },
             isResult: true,
         },
@@ -160,9 +166,9 @@ const agentFilters = [
         nodeIds: ["tts"],
     },
 ];
-export const audio = async (context) => {
+export const audio = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "audio", true);
+        MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const { studio, fileDirs, lang } = context;
         const { outDirPath, audioDirPath } = fileDirs;
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
@@ -171,13 +177,19 @@ export const audio = async (context) => {
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
+        // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
+        const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
+            const provider = speaker.provider ?? studio.script.speechParams.provider;
+            return provider === "nijivoice" || provider === "elevenlabs";
+        });
+        graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
         const graph = new GraphAI(graph_data, {
             ...vanillaAgents,
             fileWriteAgent,
             ttsOpenaiAgent,
             ttsNijivoiceAgent,
             ttsGoogleAgent,
+            ttsElevenlabsAgent,
             addBGMAgent,
             combineAudioFilesAgent,
         }, { agentFilters });
@@ -187,10 +199,16 @@ export const audio = async (context) => {
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
         graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
         graph.injectValue("audioDirPath", audioDirPath);
+        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
         await graph.run();
         writingMessage(audioCombinedFilePath);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "audio", false);
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
     }
 };

package/lib/actions/captions.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const vanillaAgents = agents.default ?? agents;
 const graph_data = {
     version: 0.5,
@@ -22,7 +22,7 @@ const graph_data = {
                         agent: async (namedInputs) => {
                             const { beat, context, index } = namedInputs;
                             try {
-                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
+                                MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
                                 const { fileDirs } = namedInputs.context;
                                 const { caption } = context;
                                 const { imageDirPath } = fileDirs;
@@ -47,7 +47,7 @@ const graph_data = {
                                 return imagePath;
                             }
                             finally {
-                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
+                                MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
                             }
                         },
                         inputs: {
@@ -64,12 +64,12 @@ const graph_data = {
 };
 export const captions = async (context) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "caption", true);
+        MulmoStudioContextMethods.setSessionState(context, "caption", true);
         const graph = new GraphAI(graph_data, { ...vanillaAgents });
         graph.injectValue("context", context);
         await graph.run();
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "caption", false);
+        MulmoStudioContextMethods.setSessionState(context, "caption", false);
     }
 };

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,2 +1,3 @@
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const images: (context: MulmoStudioContext) => Promise<void>;
+export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const htmlStyle = (script, beat) => {
     return {
         canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -25,37 +24,37 @@ const htmlStyle = (script, beat) => {
 const imagePreprocessAgent = async (namedInputs) => {
     const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
     const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
-    if (!imageParams.size) {
-        const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
-        imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
-    }
     const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
     const returnValue = {
-        aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
         imageParams,
+        movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
     };
     if (beat.image) {
         const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
         if (plugin) {
             try {
-                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
+                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
                 const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
                 const path = await plugin.process(processorParams);
                 // undefined prompt indicates that image generation is not needed
-                return { path, ...returnValue };
+                return { imagePath: path, ...returnValue };
             }
             finally {
-                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
+                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
             }
         }
     }
-    const prompt = imagePrompt(beat, imageParams.style);
+    // images for "edit_image"
     const images = (() => {
         const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
         const sources = imageNames.map((name) => imageRefs[name]);
         return sources.filter((source) => source !== undefined);
     })();
-    return { path: imagePath, prompt, ...returnValue, images };
+    if (beat.moviePrompt && !beat.imagePrompt) {
+        return { ...returnValue, images }; // no image prompt, only movie prompt
+    }
+    const prompt = imagePrompt(beat, imageParams.style);
+    return { imagePath, prompt, ...returnValue, images };
 };
 const graph_data = {
     version: 0.5,
@@ -100,64 +99,53 @@ const graph_data = {
                         retry: 3,
                         inputs: {
                             prompt: ":preprocessor.prompt",
-                            file: ":preprocessor.path", // only for fileCacheAgentFilter
+                            images: ":preprocessor.images",
+                            file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
                             text: ":preprocessor.prompt", // only for fileCacheAgentFilter
-                            force: ":context.force",
-                            studio: ":context.studio", // for cache
-                            index: ":__mapIndex", // for cache
-                            sessionType: "image", // for cache
+                            force: ":context.force", // only for fileCacheAgentFilter
+                            mulmoContext: ":context", // for fileCacheAgentFilter
+                            index: ":__mapIndex", // for fileCacheAgentFilter
+                            sessionType: "image", // for fileCacheAgentFilter
                             params: {
                                 model: ":preprocessor.imageParams.model",
-                                size: ":preprocessor.imageParams.size",
                                 moderation: ":preprocessor.imageParams.moderation",
-                                aspectRatio: ":preprocessor.aspectRatio",
-                                images: ":preprocessor.images",
+                                canvasSize: ":context.studio.script.canvasSize",
                             },
                         },
                         defaultValue: {},
                     },
-                    prepareMovie: {
-                        agent: (namedInputs) => {
-                            const { beat, imageDirPath, index, context } = namedInputs;
-                            if (beat.moviePrompt) {
-                                const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
-                                return { movieFile };
-                            }
-                            return {};
-                        },
-                        inputs: {
-                            result: ":imageGenerator", // to wait for imageGenerator to finish
-                            imagePath: ":preprocessor.path",
-                            beat: ":beat",
-                            imageDirPath: ":imageDirPath",
-                            index: ":__mapIndex",
-                            context: ":context",
-                        },
-                    },
                     movieGenerator: {
-                        if: ":prepareMovie.movieFile",
+                        if: ":preprocessor.movieFile",
                         agent: "movieGoogleAgent",
                         inputs: {
+                            onComplete: ":imageGenerator", // to wait for imageGenerator to finish
                             prompt: ":beat.moviePrompt",
-                            imagePath: ":preprocessor.path",
-                            file: ":prepareMovie.movieFile",
+                            imagePath: ":preprocessor.imagePath",
+                            file: ":preprocessor.movieFile",
                             studio: ":context.studio", // for cache
                             index: ":__mapIndex", // for cache
                             sessionType: "movie", // for cache
                             params: {
                                 model: ":context.studio.script.movieParams.model",
-                                aspectRatio: ":preprocessor.aspectRatio",
                                 duration: ":beat.duration",
+                                canvasSize: ":context.studio.script.canvasSize",
                             },
                         },
                         defaultValue: {},
                     },
+                    onComplete: {
+                        agent: "copyAgent",
+                        inputs: {
+                            onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                            imageFile: ":preprocessor.imagePath",
+                            movieFile: ":preprocessor.movieFile",
+                        },
+                    },
                     output: {
                         agent: "copyAgent",
                         inputs: {
-                            onComplete: ":movieGenerator",
-                            imageFile: ":preprocessor.path",
-                            movieFile: ":prepareMovie.movieFile",
+                            imageFile: ":onComplete.imageFile",
+                            movieFile: ":onComplete.movieFile",
                         },
                         isResult: true,
                     },
@@ -168,11 +156,26 @@ const graph_data = {
             agent: (namedInputs) => {
                 const { array, context } = namedInputs;
                 const { studio } = context;
+                const beatIndexMap = {};
                 array.forEach((update, index) => {
                     const beat = studio.beats[index];
                     studio.beats[index] = { ...beat, ...update };
+                    const id = studio.script.beats[index].id;
+                    if (id) {
+                        beatIndexMap[id] = index;
+                    }
+                });
+                studio.beats.forEach((studioBeat, index) => {
+                    const beat = studio.script.beats[index];
+                    if (beat.image?.type === "beat") {
+                        if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
+                            studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
+                        }
+                        else if (index > 0) {
+                            studioBeat.imageFile = studio.beats[index - 1].imageFile;
+                        }
+                    }
                 });
-                // console.log(namedInputs);
                 return { studio };
             },
             inputs: {
@@ -180,7 +183,7 @@ const graph_data = {
                 context: ":context",
             },
         },
-        writeOutout: {
+        writeOutput: {
             // console: { before: true },
             agent: "fileWriteAgent",
             inputs: {
@@ -191,14 +194,20 @@ const graph_data = {
     },
 };
 const googleAuth = async () => {
-    const auth = new GoogleAuth({
-        scopes: ["https://www.googleapis.com/auth/cloud-platform"],
-    });
-    const client = await auth.getClient();
-    const accessToken = await client.getAccessToken();
-    return accessToken.token;
+    try {
+        const auth = new GoogleAuth({
+            scopes: ["https://www.googleapis.com/auth/cloud-platform"],
+        });
+        const client = await auth.getClient();
+        const accessToken = await client.getAccessToken();
+        return accessToken.token;
+    }
+    catch (error) {
+        GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
+        throw error;
+    }
 };
-const generateImages = async (context) => {
+const generateImages = async (context, callbacks) => {
     const { studio, fileDirs } = context;
     const { outDirPath, imageDirPath } = fileDirs;
     mkdir(`${imageDirPath}/${studio.filename}`);
@@ -248,7 +257,25 @@ const generateImages = async (context) => {
                     throw new Error(`Failed to download image: ${image.source.url}`);
                 }
                 const buffer = Buffer.from(await response.arrayBuffer());
-                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
+                // Detect file extension from Content-Type header or URL
+                const extension = (() => {
+                    const contentType = response.headers.get("content-type");
+                    if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
+                        return "jpg";
+                    }
+                    else if (contentType?.includes("png")) {
+                        return "png";
+                    }
+                    else {
+                        // Fall back to URL extension
+                        const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
+                        if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
+                            return urlExtension === "jpeg" ? "jpg" : urlExtension;
+                        }
+                        return "png"; // default
+                    }
+                })();
+                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
                 await fs.promises.writeFile(imagePath, buffer);
                 imageRefs[key] = imagePath;
             }
@@ -266,14 +293,19 @@ const generateImages = async (context) => {
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
+    if (callbacks) {
+        callbacks.forEach((callback) => {
+            graph.registerCallback(callback);
+        });
+    }
     await graph.run();
 };
-export const images = async (context) => {
+export const images = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "image", true);
-        await generateImages(context);
+        MulmoStudioContextMethods.setSessionState(context, "image", true);
+        await generateImages(context, callbacks);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "image", false);
+        MulmoStudioContextMethods.setSessionState(context, "image", false);
     }
 };