npm - mulmocast - Versions diffs - 0.0.9 → 0.0.11 - Mend

mulmocast 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +20 -3
package/assets/templates/akira_comic.json +2 -2
package/assets/templates/drslump_comic.json +2 -2
package/assets/templates/ghibli_comic.json +2 -2
package/assets/templates/ghost_comic.json +2 -2
package/assets/templates/onepiece_comic.json +2 -2
package/assets/templates/portrait_movie.json +28 -0
package/assets/templates/realistic_movie.json +28 -0
package/assets/templates/shorts.json +18 -0
package/lib/actions/audio.d.ts +2 -1
package/lib/actions/audio.js +8 -3
package/lib/actions/captions.js +2 -2
package/lib/actions/images.d.ts +2 -1
package/lib/actions/images.js +68 -32
package/lib/actions/movie.js +10 -6
package/lib/actions/translate.d.ts +2 -1
package/lib/actions/translate.js +8 -3
package/lib/agents/combine_audio_files_agent.js +4 -0
package/lib/agents/image_google_agent.d.ts +4 -1
package/lib/agents/image_google_agent.js +3 -2
package/lib/agents/image_openai_agent.d.ts +5 -3
package/lib/agents/image_openai_agent.js +29 -4
package/lib/agents/movie_google_agent.d.ts +24 -0
package/lib/agents/movie_google_agent.js +122 -0
package/lib/cli/bin.js +12 -0
package/lib/index.d.ts +5 -0
package/lib/index.js +5 -0
package/lib/methods/mulmo_script.d.ts +0 -1
package/lib/methods/mulmo_script.js +0 -5
package/lib/methods/mulmo_studio.d.ts +1 -1
package/lib/tools/create_mulmo_script_from_url.js +2 -2
package/lib/tools/create_mulmo_script_interactively.js +2 -2
package/lib/tools/story_to_script.js +2 -2
package/lib/types/index.d.ts +1 -0
package/lib/types/index.js +1 -0
package/lib/types/schema.d.ts +155 -54
package/lib/types/schema.js +14 -2
package/lib/types/type.d.ts +3 -1
package/lib/utils/file.d.ts +1 -0
package/lib/utils/file.js +12 -8
package/lib/utils/image_plugins/image.d.ts +1 -1
package/lib/utils/image_plugins/movie.d.ts +1 -1
package/lib/utils/preprocess.d.ts +9 -3
package/lib/utils/utils.d.ts +1 -0
package/lib/utils/utils.js +3 -0
package/package.json +8 -8
package/scripts/templates/movie_prompts_template.json +50 -0
package/scripts/templates/shorts_template.json +52 -0

package/README.md CHANGED Viewed

@@ -90,11 +90,28 @@ Create a `.env` file in your project directory with the following API keys:
 ```bash
 OPENAI_API_KEY=your_openai_api_key
 ```
-### Optional
+#### (Optional) For the advanced image generation model
 ```bash
 DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
-GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
-NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
+```
+#### (Optional) For Google's image generation model
+```bash
+GOOGLE_PROJECT_ID=your_google_project_id
+```
+You may also need to take the following steps before running any commands:
+1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
+2. Login by `gcloud auth application-default login`
+#### (Optional) For Nijivoice's TTS model
+```bash
+NIJIVOICE_API_KEY=your_nijivoice_api_key
+```
+#### (Optional) to access web in mulmo tool
+```bash
 BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
 ```

package/assets/templates/akira_comic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "title": "Dr. Slump Style Comic Strips",
-  "description": "Template for Dr. Slump-style comic.",
+  "title": "Akira style",
+  "description": "Template for Akira style comic presentation.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {

package/assets/templates/drslump_comic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "title": "Dr. Slump Style Comic Strips",
-  "description": "Template for Dr. Slump-style comic.",
+  "title": "Dr. Slump Style",
+  "description": "Template for Dr. Slump style comic presentation.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {

package/assets/templates/ghibli_comic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "title": "American Comic Strips",
-  "description": "Template for Dilbert-style comic strips.",
+  "title": "Ghibli comic style",
+  "description": "Template for Ghibli-style comic presentation.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {

package/assets/templates/ghost_comic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "title": "Dr. Slump Style Comic Strips",
-  "description": "Template for Dr. Slump-style comic.",
+  "title": "Ghost in the shell style",
+  "description": "Template for Ghost in the shell style comic presentation.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {

package/assets/templates/onepiece_comic.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "title": "Dr. Slump Style Comic Strips",
-  "description": "Template for Dr. Slump-style comic.",
+  "title": "One Piece style",
+  "description": "Template for One Piece style comic presentation.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {
     "$mulmocast": {

package/assets/templates/portrait_movie.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Photo realistic movie (portrait)",
+  "description": "Template for photo realistic movie in portrait mode.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/assets/templates/realistic_movie.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Photo realistic movie template",
+  "description": "Template for photo realistic movie.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/assets/templates/shorts.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+  "title": "Short movie template",
+  "description": "Template for Youtube shorts.",
+  "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0"
+    },
+    "canvasSize": {
+      "width": 720,
+      "height": 1280
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>"
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 import "dotenv/config";
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const audio: (context: MulmoStudioContext) => Promise<void>;
+export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/audio.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import "dotenv/config";
 import { GraphAI } from "graphai";
-import vanillaAgents from "@graphai/vanilla";
+import * as agents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
 import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-// const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
 const provider_to_agent = {
@@ -160,7 +160,7 @@ const agentFilters = [
         nodeIds: ["tts"],
     },
 ];
-export const audio = async (context) => {
+export const audio = async (context, callbacks) => {
     try {
         MulmoStudioMethods.setSessionState(context.studio, "audio", true);
         const { studio, fileDirs, lang } = context;
@@ -187,6 +187,11 @@ export const audio = async (context) => {
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
         graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
         graph.injectValue("audioDirPath", audioDirPath);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
         await graph.run();
         writingMessage(audioCombinedFilePath);
     }

package/lib/actions/captions.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import { GraphAI, GraphAILogger } from "graphai";
-import vanillaAgents from "@graphai/vanilla";
+import * as agents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-// const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 const graph_data = {
     version: 0.5,
     nodes: {

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,2 +1,3 @@
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const images: (context: MulmoStudioContext) => Promise<void>;
+export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -1,16 +1,17 @@
 import dotenv from "dotenv";
 import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
-import vanillaAgents from "@graphai/vanilla";
+import * as agents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import imageGoogleAgent from "../agents/image_google_agent.js";
 import imageOpenaiAgent from "../agents/image_openai_agent.js";
+import movieGoogleAgent from "../agents/movie_google_agent.js";
 import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
-// const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
@@ -26,8 +27,8 @@ const imagePreprocessAgent = async (namedInputs) => {
     const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
     const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
     const returnValue = {
-        aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
         imageParams,
+        movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
     };
     if (beat.image) {
         const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
@@ -37,20 +38,24 @@ const imagePreprocessAgent = async (namedInputs) => {
                 const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
                 const path = await plugin.process(processorParams);
                 // undefined prompt indicates that image generation is not needed
-                return { path, ...returnValue };
+                return { imagePath: path, ...returnValue };
             }
             finally {
                 MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
             }
         }
     }
-    const prompt = imagePrompt(beat, imageParams.style);
+    // images for "edit_image"
     const images = (() => {
         const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
         const sources = imageNames.map((name) => imageRefs[name]);
         return sources.filter((source) => source !== undefined);
     })();
-    return { path: imagePath, prompt, ...returnValue, images };
+    if (beat.moviePrompt && !beat.imagePrompt) {
+        return { ...returnValue, images }; // no image prompt, only movie prompt
+    }
+    const prompt = imagePrompt(beat, imageParams.style);
+    return { imagePath, prompt, ...returnValue, images };
 };
 const graph_data = {
     version: 0.5,
@@ -95,18 +100,36 @@ const graph_data = {
                         retry: 3,
                         inputs: {
                             prompt: ":preprocessor.prompt",
-                            file: ":preprocessor.path", // only for fileCacheAgentFilter
+                            images: ":preprocessor.images",
+                            file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
                             text: ":preprocessor.prompt", // only for fileCacheAgentFilter
-                            force: ":context.force",
-                            studio: ":context.studio", // for cache
-                            index: ":__mapIndex", // for cache
-                            sessionType: "image", // for cache
+                            force: ":context.force", // only for fileCacheAgentFilter
+                            studio: ":context.studio", // for fileCacheAgentFilter
+                            index: ":__mapIndex", // for fileCacheAgentFilter
+                            sessionType: "image", // for fileCacheAgentFilter
                             params: {
                                 model: ":preprocessor.imageParams.model",
-                                size: ":preprocessor.imageParams.size",
                                 moderation: ":preprocessor.imageParams.moderation",
-                                aspectRatio: ":preprocessor.aspectRatio",
-                                images: ":preprocessor.images",
+                                canvasSize: ":context.studio.script.canvasSize",
+                            },
+                        },
+                        defaultValue: {},
+                    },
+                    movieGenerator: {
+                        if: ":preprocessor.movieFile",
+                        agent: "movieGoogleAgent",
+                        inputs: {
+                            onComplete: ":imageGenerator", // to wait for imageGenerator to finish
+                            prompt: ":beat.moviePrompt",
+                            imagePath: ":preprocessor.imagePath",
+                            file: ":preprocessor.movieFile",
+                            studio: ":context.studio", // for cache
+                            index: ":__mapIndex", // for cache
+                            sessionType: "movie", // for cache
+                            params: {
+                                model: ":context.studio.script.movieParams.model",
+                                duration: ":beat.duration",
+                                canvasSize: ":context.studio.script.canvasSize",
                             },
                         },
                         defaultValue: {},
@@ -114,11 +137,9 @@ const graph_data = {
                     output: {
                         agent: "copyAgent",
                         inputs: {
-                            result: ":imageGenerator",
-                            image: ":preprocessor.path",
-                        },
-                        output: {
-                            imageFile: ".image",
+                            onComplete: ":movieGenerator",
+                            imageFile: ":preprocessor.imagePath",
+                            movieFile: ":preprocessor.movieFile",
                         },
                         isResult: true,
                     },
@@ -141,7 +162,7 @@ const graph_data = {
                 context: ":context",
             },
         },
-        writeOutout: {
+        writeOutput: {
             // console: { before: true },
             agent: "fileWriteAgent",
             inputs: {
@@ -152,14 +173,20 @@ const graph_data = {
     },
 };
 const googleAuth = async () => {
-    const auth = new GoogleAuth({
-        scopes: ["https://www.googleapis.com/auth/cloud-platform"],
-    });
-    const client = await auth.getClient();
-    const accessToken = await client.getAccessToken();
-    return accessToken.token;
+    try {
+        const auth = new GoogleAuth({
+            scopes: ["https://www.googleapis.com/auth/cloud-platform"],
+        });
+        const client = await auth.getClient();
+        const accessToken = await client.getAccessToken();
+        return accessToken.token;
+    }
+    catch (__error) {
+        GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
+        process.exit(1);
+    }
 };
-const generateImages = async (context) => {
+const generateImages = async (context, callbacks) => {
     const { studio, fileDirs } = context;
     const { outDirPath, imageDirPath } = fileDirs;
     mkdir(`${imageDirPath}/${studio.filename}`);
@@ -167,7 +194,7 @@ const generateImages = async (context) => {
         {
             name: "fileCacheAgentFilter",
             agent: fileCacheAgentFilter,
-            nodeIds: ["imageGenerator"],
+            nodeIds: ["imageGenerator", "movieGenerator"],
         },
     ];
     const options = {
@@ -175,7 +202,7 @@ const generateImages = async (context) => {
     };
     const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
     // We need to get google's auth token only if the google is the text2image provider.
-    if (imageAgentInfo.provider === "google") {
+    if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
         options.config = {
@@ -183,6 +210,10 @@ const generateImages = async (context) => {
                 projectId: process.env.GOOGLE_PROJECT_ID,
                 token,
             },
+            movieGoogleAgent: {
+                projectId: process.env.GOOGLE_PROJECT_ID,
+                token,
+            },
         };
     }
     if (imageAgentInfo.provider === "openai") {
@@ -219,16 +250,21 @@ const generateImages = async (context) => {
         imageDirPath,
         imageRefs,
     };
-    const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
+    const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
+    if (callbacks) {
+        callbacks.forEach((callback) => {
+            graph.registerCallback(callback);
+        });
+    }
     await graph.run();
 };
-export const images = async (context) => {
+export const images = async (context, callbacks) => {
     try {
         MulmoStudioMethods.setSessionState(context.studio, "image", true);
-        await generateImages(context);
+        await generateImages(context, callbacks);
     }
     finally {
         MulmoStudioMethods.setSessionState(context.studio, "image", false);

package/lib/actions/movie.js CHANGED Viewed

@@ -61,8 +61,8 @@ const getOutputOption = (audioId) => {
 const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
     const start = performance.now();
     const ffmpegContext = FfmpegContextInit();
-    if (studio.beats.some((beat) => !beat.imageFile)) {
-        GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
+    if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
+        GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
         return;
     }
     const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
@@ -71,11 +71,15 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
     const filterComplexAudioIds = [];
     studio.beats.reduce((timestamp, studioBeat, index) => {
         const beat = studio.script.beats[index];
-        if (!studioBeat.imageFile || !studioBeat.duration) {
-            throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
+        const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
+        if (!sourceFile) {
+            throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
         }
-        const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.imageFile);
-        const mediaType = MulmoScriptMethods.getImageType(studio.script, beat);
+        if (!studioBeat.duration) {
+            throw new Error(`studioBeat.duration is not set: index=${index}`);
+        }
+        const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
+        const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
         const extraPadding = (() => {
             // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
             if (index === 0) {

package/lib/actions/translate.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 import "dotenv/config";
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const translate: (context: MulmoStudioContext) => Promise<void>;
+export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/translate.js CHANGED Viewed

@@ -1,13 +1,13 @@
 import "dotenv/config";
 import { GraphAI, assert } from "graphai";
-import vanillaAgents from "@graphai/vanilla";
+import * as agents from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-// const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 const translateGraph = {
     version: 0.5,
     nodes: {
@@ -208,7 +208,7 @@ const agentFilters = [
 ];
 const defaultLang = "en";
 const targetLangs = ["ja", "en"];
-export const translate = async (context) => {
+export const translate = async (context, callbacks) => {
     try {
         MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
         const { studio, fileDirs } = context;
@@ -222,6 +222,11 @@ export const translate = async (context) => {
         graph.injectValue("targetLangs", targetLangs);
         graph.injectValue("outDirPath", outDirPath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
         const results = await graph.run();
         writingMessage(outputStudioFilePath);
         if (results.mergeStudioResult) {

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
             const totalPadding = await (async () => {
                 if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
                     const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
+                    // NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
                     const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
                     if (movieDuration > audioDuration) {
                         return padding + (movieDuration - audioDuration);
                     }
                 }
+                else if (beat.duration && beat.duration > audioDuration) {
+                    return padding + (beat.duration - audioDuration);
+                }
                 return padding;
             })();
             studioBeat.duration = audioDuration + totalPadding;

package/lib/agents/image_google_agent.d.ts CHANGED Viewed

@@ -5,7 +5,10 @@ export type ImageGoogleConfig = {
 };
 export declare const imageGoogleAgent: AgentFunction<{
     model: string;
-    aspectRatio: string;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
 }, {
     buffer: Buffer;
 }, {

package/lib/agents/image_google_agent.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { GraphAILogger } from "graphai";
+import { getAspectRatio } from "./movie_google_agent.js";
 async function generateImage(projectId, model, token, prompt, aspectRatio) {
     const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
     try {
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
         throw error;
     }
 }
-export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
+export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
     const { prompt } = namedInputs;
-    const aspectRatio = params.aspectRatio ?? "16:9";
+    const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? "imagen-3.0-fast-generate-001";
     //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
     const projectId = config?.projectId;

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -1,16 +1,18 @@
 import { AgentFunction, AgentFunctionInfo } from "graphai";
-type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
 type OpenAIModeration = "low" | "auto";
 export declare const imageOpenaiAgent: AgentFunction<{
     apiKey: string;
     model: string;
-    size: OpenAIImageSize | null | undefined;
     moderation: OpenAIModeration | null | undefined;
-    images: string[] | null | undefined;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
 }, {
     buffer: Buffer;
 }, {
     prompt: string;
+    images: string[] | null | undefined;
 }>;
 declare const imageOpenaiAgentInfo: AgentFunctionInfo;
 export default imageOpenaiAgentInfo;

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -2,14 +2,39 @@ import fs from "fs";
 import OpenAI, { toFile } from "openai";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params }) => {
-    const { prompt } = namedInputs;
-    const { apiKey, model, size, moderation, images } = params;
+    const { prompt, images } = namedInputs;
+    const { apiKey, moderation, canvasSize } = params;
+    const model = params.model ?? "dall-e-3";
     const openai = new OpenAI({ apiKey });
+    const size = (() => {
+        if (model === "gpt-image-1") {
+            if (canvasSize.width > canvasSize.height) {
+                return "1536x1024";
+            }
+            else if (canvasSize.width < canvasSize.height) {
+                return "1024x1536";
+            }
+            else {
+                return "1024x1024";
+            }
+        }
+        else {
+            if (canvasSize.width > canvasSize.height) {
+                return "1792x1024";
+            }
+            else if (canvasSize.width < canvasSize.height) {
+                return "1024x1792";
+            }
+            else {
+                return "1024x1024";
+            }
+        }
+    })();
     const imageOptions = {
-        model: model ?? "dall-e-3",
+        model,
         prompt,
         n: 1,
-        size: size || model === "gpt-image-1" ? "1536x1024" : "1792x1024",
+        size,
     };
     if (model === "gpt-image-1") {
         imageOptions.moderation = moderation || "auto";