npm - mulmocast - Versions diffs - 0.0.11 → 0.0.12 - Mend

mulmocast 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +1 -3
package/assets/templates/ghibli_shorts.json +34 -0
package/assets/templates/trailer.json +25 -0
package/lib/actions/audio.js +29 -16
package/lib/actions/captions.js +5 -5
package/lib/actions/images.js +51 -12
package/lib/actions/movie.js +46 -13
package/lib/actions/pdf.js +3 -3
package/lib/actions/translate.js +15 -15
package/lib/agents/image_openai_agent.js +6 -3
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
package/lib/agents/tts_elevenlabs_agent.js +60 -0
package/lib/agents/tts_google_agent.js +1 -1
package/lib/agents/tts_nijivoice_agent.js +3 -2
package/lib/agents/tts_openai_agent.js +1 -1
package/lib/cli/commands/audio/handler.js +4 -1
package/lib/cli/commands/image/handler.js +4 -1
package/lib/cli/commands/movie/handler.js +4 -1
package/lib/cli/commands/pdf/handler.js +4 -1
package/lib/cli/commands/translate/handler.js +4 -1
package/lib/cli/helpers.d.ts +3 -3
package/lib/cli/helpers.js +38 -20
package/lib/methods/mulmo_media_source.d.ts +1 -0
package/lib/methods/mulmo_media_source.js +12 -0
package/lib/methods/mulmo_script.d.ts +1 -0
package/lib/methods/mulmo_script.js +9 -0
package/lib/methods/mulmo_studio_context.d.ts +5 -0
package/lib/methods/mulmo_studio_context.js +23 -0
package/lib/types/schema.d.ts +1498 -242
package/lib/types/schema.js +25 -34
package/lib/types/type.d.ts +4 -1
package/lib/utils/file.d.ts +4 -15
package/lib/utils/file.js +2 -13
package/lib/utils/filters.js +4 -4
package/lib/utils/image_plugins/beat.d.ts +4 -0
package/lib/utils/image_plugins/beat.js +7 -0
package/lib/utils/image_plugins/index.d.ts +2 -1
package/lib/utils/image_plugins/index.js +2 -1
package/lib/utils/image_plugins/source.js +2 -2
package/lib/utils/preprocess.d.ts +24 -20
package/lib/utils/preprocess.js +4 -0
package/package.json +1 -1
package/scripts/templates/movie_prompts_no_text_template.json +50 -0

package/README.md CHANGED Viewed

@@ -101,9 +101,7 @@ DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
 GOOGLE_PROJECT_ID=your_google_project_id
 ```
-You may also need to take the following steps before running any commands:
-1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
-2. Login by `gcloud auth application-default login`
+See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
 #### (Optional) For Nijivoice's TTS model
 ```bash

package/assets/templates/ghibli_shorts.json ADDED Viewed

@@ -0,0 +1,34 @@
+{
+  "title": "Ghibli comic style",
+  "description": "Template for Ghibli-style comic presentation.",
+  "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "speechParams": {
+      "provider": "nijivoice",
+      "speakers": {
+        "Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
+      }
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/trailer.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "title": "Movie Trailer template",
+  "description": "Template for A Movie Trailer.",
+  "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0"
+    },
+    "canvasSize": {
+      "width": 1280,
+      "height": 720
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>"
+    },
+    "audioParams": {
+      "padding": 0.0,
+      "introPadding": 0.0,
+      "closingPadding": 0.0,
+      "outroPadding": 2.5,
+      "bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
+    }
+  },
+  "scriptName": "movie_prompts_no_text_template.json"
+}

package/lib/actions/audio.js CHANGED Viewed

@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
 import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
 import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
 import ttsGoogleAgent from "../agents/tts_google_agent.js";
+import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
+import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
+import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 const vanillaAgents = agents.default ?? agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -19,10 +21,11 @@ const provider_to_agent = {
     nijivoice: "ttsNijivoiceAgent",
     openai: "ttsOpenaiAgent",
     google: "ttsGoogleAgent",
+    elevenlabs: "ttsElevenlabsAgent",
 };
 const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     if (beat.audio?.type === "audio") {
-        const path = resolveMediaSource(beat.audio.source, context);
+        const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
         if (path) {
             return path;
         }
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
+    const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
     const { lang } = context;
-    const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
+    const speaker = context.studio.script.speechParams.speakers[beat.speaker];
+    const voiceId = speaker.voiceId;
     const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
     const text = localizedText(beat, multiLingual, lang);
-    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
-    const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
+    // Use speaker-specific provider if available, otherwise fall back to script-level provider
+    const provider = speaker.provider ?? context.studio.script.speechParams.provider;
+    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
+    const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
     const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
     studioBeat.audioFile = audioPath;
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
-        ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
+        ttsAgent: provider_to_agent[provider],
         studioBeat,
         voiceId,
         speechOptions,
@@ -62,7 +68,6 @@ const graph_tts = {
                 beat: ":beat",
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
-                index: ":__mapIndex",
                 context: ":context",
                 audioDirPath: ":audioDirPath",
             },
@@ -74,7 +79,7 @@ const graph_tts = {
                 text: ":preprocessor.text",
                 file: ":preprocessor.audioPath",
                 force: ":context.force",
-                studio: ":context.studio", // for cache
+                mulmoContext: ":context", // for cache
                 index: ":__mapIndex", // for cache
                 sessionType: "audio", // for cache
                 params: {
@@ -96,6 +101,7 @@ const graph_data = {
         outputStudioFilePath: {},
         audioDirPath: {},
         audioSegmentDirPath: {},
+        musicFile: {},
         map: {
             agent: "mapAgent",
             inputs: {
@@ -130,14 +136,14 @@ const graph_data = {
         },
         addBGM: {
             agent: "addBGMAgent",
-            params: {
-                musicFile: process.env.PATH_BGM ?? defaultBGMPath,
-            },
             inputs: {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
                 outputFile: ":audioArtifactFilePath",
                 script: ":context.studio.script",
+                params: {
+                    musicFile: ":musicFile",
+                },
             },
             isResult: true,
         },
@@ -162,7 +168,7 @@ const agentFilters = [
 ];
 export const audio = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "audio", true);
+        MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const { studio, fileDirs, lang } = context;
         const { outDirPath, audioDirPath } = fileDirs;
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
@@ -171,13 +177,19 @@ export const audio = async (context, callbacks) => {
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
+        // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
+        const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
+            const provider = speaker.provider ?? studio.script.speechParams.provider;
+            return provider === "nijivoice" || provider === "elevenlabs";
+        });
+        graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
         const graph = new GraphAI(graph_data, {
             ...vanillaAgents,
             fileWriteAgent,
             ttsOpenaiAgent,
             ttsNijivoiceAgent,
             ttsGoogleAgent,
+            ttsElevenlabsAgent,
             addBGMAgent,
             combineAudioFilesAgent,
         }, { agentFilters });
@@ -187,6 +199,7 @@ export const audio = async (context, callbacks) => {
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
         graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
         graph.injectValue("audioDirPath", audioDirPath);
+        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
         if (callbacks) {
             callbacks.forEach((callback) => {
                 graph.registerCallback(callback);
@@ -196,6 +209,6 @@ export const audio = async (context, callbacks) => {
         writingMessage(audioCombinedFilePath);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "audio", false);
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
     }
 };

package/lib/actions/captions.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const vanillaAgents = agents.default ?? agents;
 const graph_data = {
     version: 0.5,
@@ -22,7 +22,7 @@ const graph_data = {
                         agent: async (namedInputs) => {
                             const { beat, context, index } = namedInputs;
                             try {
-                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
+                                MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
                                 const { fileDirs } = namedInputs.context;
                                 const { caption } = context;
                                 const { imageDirPath } = fileDirs;
@@ -47,7 +47,7 @@ const graph_data = {
                                 return imagePath;
                             }
                             finally {
-                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
+                                MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
                             }
                         },
                         inputs: {
@@ -64,12 +64,12 @@ const graph_data = {
 };
 export const captions = async (context) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "caption", true);
+        MulmoStudioContextMethods.setSessionState(context, "caption", true);
         const graph = new GraphAI(graph_data, { ...vanillaAgents });
         graph.injectValue("context", context);
         await graph.run();
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "caption", false);
+        MulmoStudioContextMethods.setSessionState(context, "caption", false);
     }
 };

package/lib/actions/images.js CHANGED Viewed

@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const htmlStyle = (script, beat) => {
     return {
         canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -34,14 +33,14 @@ const imagePreprocessAgent = async (namedInputs) => {
         const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
         if (plugin) {
             try {
-                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
+                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
                 const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
                 const path = await plugin.process(processorParams);
                 // undefined prompt indicates that image generation is not needed
                 return { imagePath: path, ...returnValue };
             }
             finally {
-                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
+                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
             }
         }
     }
@@ -104,7 +103,7 @@ const graph_data = {
                             file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
                             text: ":preprocessor.prompt", // only for fileCacheAgentFilter
                             force: ":context.force", // only for fileCacheAgentFilter
-                            studio: ":context.studio", // for fileCacheAgentFilter
+                            mulmoContext: ":context", // for fileCacheAgentFilter
                             index: ":__mapIndex", // for fileCacheAgentFilter
                             sessionType: "image", // for fileCacheAgentFilter
                             params: {
@@ -134,13 +133,20 @@ const graph_data = {
                         },
                         defaultValue: {},
                     },
-                    output: {
+                    onComplete: {
                         agent: "copyAgent",
                         inputs: {
-                            onComplete: ":movieGenerator",
+                            onComplete: ":movieGenerator", // to wait for movieGenerator to finish
                             imageFile: ":preprocessor.imagePath",
                             movieFile: ":preprocessor.movieFile",
                         },
+                    },
+                    output: {
+                        agent: "copyAgent",
+                        inputs: {
+                            imageFile: ":onComplete.imageFile",
+                            movieFile: ":onComplete.movieFile",
+                        },
                         isResult: true,
                     },
                 },
@@ -150,11 +156,26 @@ const graph_data = {
             agent: (namedInputs) => {
                 const { array, context } = namedInputs;
                 const { studio } = context;
+                const beatIndexMap = {};
                 array.forEach((update, index) => {
                     const beat = studio.beats[index];
                     studio.beats[index] = { ...beat, ...update };
+                    const id = studio.script.beats[index].id;
+                    if (id) {
+                        beatIndexMap[id] = index;
+                    }
+                });
+                studio.beats.forEach((studioBeat, index) => {
+                    const beat = studio.script.beats[index];
+                    if (beat.image?.type === "beat") {
+                        if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
+                            studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
+                        }
+                        else if (index > 0) {
+                            studioBeat.imageFile = studio.beats[index - 1].imageFile;
+                        }
+                    }
                 });
-                // console.log(namedInputs);
                 return { studio };
             },
             inputs: {
@@ -181,9 +202,9 @@ const googleAuth = async () => {
         const accessToken = await client.getAccessToken();
         return accessToken.token;
     }
-    catch (__error) {
+    catch (error) {
         GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
-        process.exit(1);
+        throw error;
     }
 };
 const generateImages = async (context, callbacks) => {
@@ -236,7 +257,25 @@ const generateImages = async (context, callbacks) => {
                     throw new Error(`Failed to download image: ${image.source.url}`);
                 }
                 const buffer = Buffer.from(await response.arrayBuffer());
-                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
+                // Detect file extension from Content-Type header or URL
+                const extension = (() => {
+                    const contentType = response.headers.get("content-type");
+                    if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
+                        return "jpg";
+                    }
+                    else if (contentType?.includes("png")) {
+                        return "png";
+                    }
+                    else {
+                        // Fall back to URL extension
+                        const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
+                        if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
+                            return urlExtension === "jpeg" ? "jpg" : urlExtension;
+                        }
+                        return "png"; // default
+                    }
+                })();
+                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
                 await fs.promises.writeFile(imagePath, buffer);
                 imageRefs[key] = imagePath;
             }
@@ -263,10 +302,10 @@ const generateImages = async (context, callbacks) => {
 };
 export const images = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "image", true);
+        MulmoStudioContextMethods.setSessionState(context, "image", true);
         await generateImages(context, callbacks);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "image", false);
+        MulmoStudioContextMethods.setSessionState(context, "image", false);
     }
 };

package/lib/actions/movie.js CHANGED Viewed

@@ -1,8 +1,9 @@
-import { GraphAILogger } from "graphai";
+import { GraphAILogger, assert } from "graphai";
+import { mulmoTransitionSchema } from "../types/index.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
@@ -38,10 +39,10 @@ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
             `[${audioId}]`,
     };
 };
-const getOutputOption = (audioId) => {
+const getOutputOption = (audioId, videoId) => {
     return [
         "-preset medium", // Changed from veryfast to medium for better compression
-        "-map [v]", // Map the video stream
+        `-map [${videoId}]`, // Map the video stream
         `-map ${audioId}`, // Map the audio stream
         `-c:v ${videoCodec}`, // Set video codec
         ...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
@@ -61,14 +62,17 @@ const getOutputOption = (audioId) => {
 const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
     const start = performance.now();
     const ffmpegContext = FfmpegContextInit();
-    if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
-        GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
-        return;
+    const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
+    if (missingIndex !== -1) {
+        GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
+        return false;
     }
     const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
     // Add each image input
     const filterComplexVideoIds = [];
     const filterComplexAudioIds = [];
+    const transitionVideoIds = [];
+    const beatTimestamps = [];
     studio.beats.reduce((timestamp, studioBeat, index) => {
         const beat = studio.script.beats[index];
         const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
@@ -102,16 +106,43 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         else {
             filterComplexVideoIds.push(videoId);
         }
+        if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
+            const sourceId = filterComplexVideoIds.pop();
+            ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
+            filterComplexVideoIds.push(`${sourceId}_0`);
+            transitionVideoIds.push(`${sourceId}_1`);
+        }
         if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
             filterComplexAudioIds.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);
         }
+        beatTimestamps.push(timestamp);
         return timestamp + duration;
     }, 0);
+    assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
+    assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
     // console.log("*** images", images.audioIds);
     // Concatenate the trimmed images
-    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
+    const concatVideoId = "concat_video";
+    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
+    // Add tranditions if needed
+    const mixedVideoId = (() => {
+        if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
+            const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
+            return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
+                const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
+                const processedVideoId = `${transitionVideoId}_f`;
+                // TODO: This mechanism does not work for video beats yet. It works only with image beats.
+                // If we can to add other transition types than fade, we need to add them here.
+                ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
+                const outputId = `${transitionVideoId}_o`;
+                ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+                return outputId;
+            }, concatVideoId);
+        }
+        return concatVideoId;
+    })();
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
     const artifactAudioId = `${audioIndex}:a`;
     const ffmpegContextAudioId = (() => {
@@ -125,23 +156,25 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         }
         return artifactAudioId;
     })();
-    await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
+    await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const end = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
     GraphAILogger.info(studio.script.title);
     GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
+    return true;
 };
 export const movie = async (context) => {
-    MulmoStudioMethods.setSessionState(context.studio, "video", true);
+    MulmoStudioContextMethods.setSessionState(context, "video", true);
     try {
         const { studio, fileDirs, caption } = context;
         const { outDirPath } = fileDirs;
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
         const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
-        await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
-        writingMessage(outputVideoPath);
+        if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
+            writingMessage(outputVideoPath);
+        }
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "video", false);
+        MulmoStudioContextMethods.setSessionState(context, "video", false);
     }
 };

package/lib/actions/pdf.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const imagesPerPage = 4;
 const offset = 10;
 const handoutImageRatio = 0.5;
@@ -224,10 +224,10 @@ const generatePdf = async (context, pdfMode, pdfSize) => {
 };
 export const pdf = async (context, pdfMode, pdfSize) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
+        MulmoStudioContextMethods.setSessionState(context, "pdf", true);
         await generatePdf(context, pdfMode, pdfSize);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
+        MulmoStudioContextMethods.setSessionState(context, "pdf", false);
     }
 };

package/lib/actions/translate.js CHANGED Viewed

@@ -6,19 +6,19 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const vanillaAgents = agents.default ?? agents;
 const translateGraph = {
     version: 0.5,
     nodes: {
-        studio: {},
+        context: {},
         defaultLang: {},
         outDirPath: {},
         outputStudioFilePath: {},
         lang: {
             agent: "stringUpdateTextAgent",
             inputs: {
-                newText: ":studio.script.lang",
+                newText: ":context.studio.script.lang",
                 oldText: ":defaultLang",
             },
         },
@@ -27,15 +27,15 @@ const translateGraph = {
             isResult: true,
             agent: "mergeObjectAgent",
             inputs: {
-                items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
+                items: [":context.studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
             },
         },
         beatsMap: {
             agent: "mapAgent",
             inputs: {
                 targetLangs: ":targetLangs",
-                studio: ":studio",
-                rows: ":studio.script.beats",
+                context: ":context",
+                rows: ":context.studio.script.beats",
                 lang: ":lang",
             },
             params: {
@@ -52,7 +52,7 @@ const translateGraph = {
                         },
                         inputs: {
                             index: ":__mapIndex",
-                            rows: ":studio.multiLingual",
+                            rows: ":context.studio.multiLingual",
                         },
                     },
                     preprocessMultiLingual: {
@@ -62,7 +62,7 @@ const translateGraph = {
                             multiLingual: ":multiLingual",
                             rows: ":targetLangs",
                             lang: ":lang.text",
-                            studio: ":studio",
+                            context: ":context",
                             beatIndex: ":__mapIndex",
                         },
                         params: {
@@ -79,7 +79,7 @@ const translateGraph = {
                                         multiLingual: ":multiLingual", // for cache
                                         lang: ":lang", // for cache
                                         beatIndex: ":beatIndex", // for cache
-                                        studio: ":studio", // for cache
+                                        mulmoContext: ":context", // for cache
                                         system: translateSystemPrompt,
                                         prompt: translatePrompts,
                                     },
@@ -175,7 +175,7 @@ const translateGraph = {
 };
 const localizedTextCacheAgentFilter = async (context, next) => {
     const { namedInputs } = context;
-    const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
+    const { mulmoContext, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
     if (!beat.text) {
         return { text: "" };
     }
@@ -192,11 +192,11 @@ const localizedTextCacheAgentFilter = async (context, next) => {
         return { text: beat.text };
     }
     try {
-        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
         return await next(context);
     }
     finally {
-        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, false);
     }
 };
 const agentFilters = [
@@ -210,14 +210,14 @@ const defaultLang = "en";
 const targetLangs = ["ja", "en"];
 export const translate = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
+        MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
         const { studio, fileDirs } = context;
         const { outDirPath } = fileDirs;
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
         const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
-        graph.injectValue("studio", studio);
+        graph.injectValue("context", context);
         graph.injectValue("defaultLang", defaultLang);
         graph.injectValue("targetLangs", targetLangs);
         graph.injectValue("outDirPath", outDirPath);
@@ -234,6 +234,6 @@ export const translate = async (context, callbacks) => {
         }
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
+        MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
     }
 };