npm - mulmocast - Versions diffs - 0.0.8 → 0.0.10 - Mend

mulmocast 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/assets/templates/akira_comic.json +28 -0
package/assets/templates/children_book.json +13 -0
package/assets/templates/comic_strips.json +14 -1
package/assets/templates/drslump_comic.json +28 -0
package/assets/templates/ghibli_comic.json +28 -0
package/assets/templates/ghost_comic.json +35 -0
package/assets/templates/onepiece_comic.json +28 -0
package/assets/templates/portrait_movie.json +28 -0
package/assets/templates/realistic_movie.json +28 -0
package/assets/templates/sensei_and_taro.json +21 -0
package/lib/actions/audio.js +1 -1
package/lib/actions/captions.js +1 -1
package/lib/actions/images.js +98 -13
package/lib/actions/movie.d.ts +1 -1
package/lib/actions/movie.js +13 -11
package/lib/actions/pdf.js +6 -4
package/lib/actions/translate.js +1 -1
package/lib/agents/image_openai_agent.d.ts +1 -0
package/lib/agents/image_openai_agent.js +16 -4
package/lib/agents/movie_google_agent.d.ts +17 -0
package/lib/agents/movie_google_agent.js +114 -0
package/lib/cli/bin.js +19 -0
package/lib/cli/helpers.js +2 -1
package/lib/methods/mulmo_studio.d.ts +1 -1
package/lib/tools/create_mulmo_script_from_url.js +1 -1
package/lib/tools/create_mulmo_script_interactively.js +1 -1
package/lib/tools/story_to_script.js +1 -1
package/lib/types/schema.d.ts +1966 -322
package/lib/types/schema.js +21 -3
package/lib/types/type.d.ts +3 -1
package/lib/utils/file.js +20 -9
package/lib/utils/pdf.d.ts +1 -0
package/lib/utils/pdf.js +5 -3
package/lib/utils/preprocess.d.ts +57 -16
package/lib/utils/utils.d.ts +1 -0
package/lib/utils/utils.js +3 -0
package/package.json +9 -9
package/scripts/templates/children_book.json +0 -7
package/scripts/templates/image_prompts_template.json +41 -0
package/scripts/templates/movie_prompts_template.json +50 -0
package/scripts/templates/sensei_and_taro.json +0 -11
package/scripts/templates/text_only_template.json +35 -0
package/assets/templates/ghibli_strips.json +0 -6
package/scripts/templates/comic_strips.json +0 -30
package/scripts/templates/ghibli_strips.json +0 -30

package/assets/templates/akira_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Akira style",
+  "description": "Template for Akira style comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>AKIRA aesthetic.</style>",
+      "images": {
+        "girl": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/children_book.json CHANGED Viewed

@@ -2,5 +2,18 @@
   "title": "Children Book",
   "description": "Template for children book.",
   "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
+    }
+  },
   "scriptName": "children_book.json"
 }

package/assets/templates/comic_strips.json CHANGED Viewed

@@ -2,5 +2,18 @@
   "title": "American Comic Strips",
   "description": "Template for Dilbert-style comic strips.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
-  "scriptName": "comic_strips.json"
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
+    }
+  },
+  "scriptName": "text_only_template.json"
 }

package/assets/templates/drslump_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Dr. Slump Style",
+  "description": "Template for Dr. Slump style comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
+      "images": {
+        "girl": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/ghibli_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Ghibli comic style",
+  "description": "Template for Ghibli-style comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/ghost_comic.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+  "title": "Ghost in the shell style",
+  "description": "Template for Ghost in the shell style comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghost in the shell aesthetic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
+          }
+        },
+        "optimus": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/onepiece_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "One Piece style",
+  "description": "Template for One Piece style comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>One Piece aesthetic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/portrait_movie.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Photo realistic movie (portrait)",
+  "description": "Template for photo realistic movie in portrait mode.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/assets/templates/realistic_movie.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Photo realistic movie template",
+  "description": "Template for photo realistic movie.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Photo realistic, cinematic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "movie_prompts_template.json"
+}

package/assets/templates/sensei_and_taro.json CHANGED Viewed

@@ -2,5 +2,26 @@
   "title": "Student and Teacher",
   "description": "Interactive discussion between a student and teacher",
   "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
+    },
+    "speechParams": {
+      "provider": "nijivoice",
+      "speakers": {
+        "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
+        "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
+        "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
+      }
+    }
+  },
   "scriptName": "sensei_and_taro.json"
 }

package/lib/actions/audio.js CHANGED Viewed

@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
 const provider_to_agent = {

package/lib/actions/captions.js CHANGED Viewed

@@ -3,7 +3,7 @@ import * as agents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 const graph_data = {
     version: 0.5,
     nodes: {

package/lib/actions/images.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import dotenv from "dotenv";
+import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
@@ -6,10 +7,11 @@ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import imageGoogleAgent from "../agents/image_google_agent.js";
 import imageOpenaiAgent from "../agents/image_openai_agent.js";
-import { MulmoScriptMethods } from "../methods/index.js";
+import movieGoogleAgent from "../agents/movie_google_agent.js";
+import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
-const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
@@ -21,8 +23,12 @@ const htmlStyle = (script, beat) => {
     };
 };
 const imagePreprocessAgent = async (namedInputs) => {
-    const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
+    const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
     const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
+    if (!imageParams.size) {
+        const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
+        imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
+    }
     const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
     const returnValue = {
         aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
@@ -44,7 +50,12 @@ const imagePreprocessAgent = async (namedInputs) => {
         }
     }
     const prompt = imagePrompt(beat, imageParams.style);
-    return { path: imagePath, prompt, ...returnValue };
+    const images = (() => {
+        const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
+        const sources = imageNames.map((name) => imageRefs[name]);
+        return sources.filter((source) => source !== undefined);
+    })();
+    return { path: imagePath, prompt, ...returnValue, images };
 };
 const graph_data = {
     version: 0.5,
@@ -54,9 +65,16 @@ const graph_data = {
         imageDirPath: {},
         imageAgentInfo: {},
         outputStudioFilePath: {},
+        imageRefs: {},
         map: {
             agent: "mapAgent",
-            inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
+            inputs: {
+                rows: ":context.studio.script.beats",
+                context: ":context",
+                imageAgentInfo: ":imageAgentInfo",
+                imageDirPath: ":imageDirPath",
+                imageRefs: ":imageRefs",
+            },
             isResult: true,
             params: {
                 rowKey: "beat",
@@ -73,6 +91,7 @@ const graph_data = {
                             suffix: "p",
                             imageDirPath: ":imageDirPath",
                             imageAgentInfo: ":imageAgentInfo",
+                            imageRefs: ":imageRefs",
                         },
                     },
                     imageGenerator: {
@@ -92,6 +111,43 @@ const graph_data = {
                                 size: ":preprocessor.imageParams.size",
                                 moderation: ":preprocessor.imageParams.moderation",
                                 aspectRatio: ":preprocessor.aspectRatio",
+                                images: ":preprocessor.images",
+                            },
+                        },
+                        defaultValue: {},
+                    },
+                    prepareMovie: {
+                        agent: (namedInputs) => {
+                            const { beat, imageDirPath, index, context } = namedInputs;
+                            if (beat.moviePrompt) {
+                                const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
+                                return { movieFile };
+                            }
+                            return {};
+                        },
+                        inputs: {
+                            result: ":imageGenerator", // to wait for imageGenerator to finish
+                            imagePath: ":preprocessor.path",
+                            beat: ":beat",
+                            imageDirPath: ":imageDirPath",
+                            index: ":__mapIndex",
+                            context: ":context",
+                        },
+                    },
+                    movieGenerator: {
+                        if: ":prepareMovie.movieFile",
+                        agent: "movieGoogleAgent",
+                        inputs: {
+                            prompt: ":beat.moviePrompt",
+                            imagePath: ":preprocessor.path",
+                            file: ":prepareMovie.movieFile",
+                            studio: ":context.studio", // for cache
+                            index: ":__mapIndex", // for cache
+                            sessionType: "movie", // for cache
+                            params: {
+                                model: ":context.studio.script.movieParams.model",
+                                aspectRatio: ":preprocessor.aspectRatio",
+                                duration: ":beat.duration",
                             },
                         },
                         defaultValue: {},
@@ -99,11 +155,9 @@ const graph_data = {
                     output: {
                         agent: "copyAgent",
                         inputs: {
-                            result: ":imageGenerator",
-                            image: ":preprocessor.path",
-                        },
-                        output: {
-                            imageFile: ".image",
+                            onComplete: ":movieGenerator",
+                            imageFile: ":preprocessor.path",
+                            movieFile: ":prepareMovie.movieFile",
                         },
                         isResult: true,
                     },
@@ -152,7 +206,7 @@ const generateImages = async (context) => {
         {
             name: "fileCacheAgentFilter",
             agent: fileCacheAgentFilter,
-            nodeIds: ["imageGenerator"],
+            nodeIds: ["imageGenerator", "movieGenerator"],
         },
     ];
     const options = {
@@ -160,7 +214,7 @@ const generateImages = async (context) => {
     };
     const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
     // We need to get google's auth token only if the google is the text2image provider.
-    if (imageAgentInfo.provider === "google") {
+    if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
         options.config = {
@@ -168,16 +222,47 @@ const generateImages = async (context) => {
                 projectId: process.env.GOOGLE_PROJECT_ID,
                 token,
             },
+            movieGoogleAgent: {
+                projectId: process.env.GOOGLE_PROJECT_ID,
+                token,
+            },
         };
     }
+    if (imageAgentInfo.provider === "openai") {
+        // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
+        // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
+        // gpt-image-1：3,000,000 TPM、150 images per minute
+        graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
+    }
+    const imageRefs = {};
+    const images = studio.script.imageParams?.images;
+    if (images) {
+        await Promise.all(Object.keys(images).map(async (key) => {
+            const image = images[key];
+            if (image.source.kind === "path") {
+                imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
+            }
+            else if (image.source.kind === "url") {
+                const response = await fetch(image.source.url);
+                if (!response.ok) {
+                    throw new Error(`Failed to download image: ${image.source.url}`);
+                }
+                const buffer = Buffer.from(await response.arrayBuffer());
+                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
+                await fs.promises.writeFile(imagePath, buffer);
+                imageRefs[key] = imagePath;
+            }
+        }));
+    }
     GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
     const injections = {
         context,
         imageAgentInfo,
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
         imageDirPath,
+        imageRefs,
     };
-    const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
+    const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
     videoId: string;
     videoPart: string;
 };
-export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
+export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
     audioId: string;
     audioPart: string;
 };

package/lib/actions/movie.js CHANGED Viewed

@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
         videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
     };
 };
-export const getAudioPart = (inputIndex, duration, delay) => {
+export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
     const audioId = `a${inputIndex}`;
     return {
         audioId,
         audioPart: `[${inputIndex}:a]` +
             `atrim=duration=${duration},` + // Trim to beat duration
             `adelay=${delay * 1000}|${delay * 1000},` +
+            `volume=${mixAudio},` + // 👈 add this line
             `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
             `[${audioId}]`,
     };
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
     // Add each image input
     const filterComplexVideoIds = [];
     const filterComplexAudioIds = [];
-    studio.beats.reduce((timestamp, beat, index) => {
-        if (!beat.imageFile || !beat.duration) {
-            throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
+    studio.beats.reduce((timestamp, studioBeat, index) => {
+        const beat = studio.script.beats[index];
+        if (!studioBeat.imageFile || !studioBeat.duration) {
+            throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
         }
-        const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
-        const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
+        const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.movieFile ?? studioBeat.imageFile);
+        const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
         const extraPadding = (() => {
             // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
             if (index === 0) {
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
             }
             return 0;
         })();
-        const duration = beat.duration + extraPadding;
+        const duration = studioBeat.duration + extraPadding;
         const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
         ffmpegContext.filterComplex.push(videoPart);
-        if (caption && beat.captionFile) {
-            const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
+        if (caption && studioBeat.captionFile) {
+            const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
             const compositeVideoId = `c${index}`;
             ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
             filterComplexVideoIds.push(compositeVideoId);
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         else {
             filterComplexVideoIds.push(videoId);
         }
-        if (mediaType === "movie") {
-            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
+        if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
+            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
             filterComplexAudioIds.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);
         }

package/lib/actions/pdf.js CHANGED Viewed

@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
             const pos = (() => {
                 if (isLandscapeImage) {
                     const cellHeight = pageHeight / imagesPerPage - offset;
-                    const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
-                    const x = offset;
+                    const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
+                    const x = offset + (containerWidth - drawWidth) / 2;
                     const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
                     return {
                         x,
                         y,
                         width: drawWidth,
                         height: drawHeight,
+                        containerWidth,
                     };
                 }
                 else {
                     const cellWidth = pageWidth / imagesPerPage;
-                    const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
+                    const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
                     const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
                     const y = pageHeight - drawHeight - offset;
                     return {
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
                         y,
                         width: drawWidth,
                         height: drawHeight,
+                        containerWidth,
                     };
                 }
             })();
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
                 for (const [index, line] of lines.entries()) {
                     page.drawText(line, {
                         ...pos,
-                        x: pos.x + pos.width + textMargin,
+                        x: offset + pos.containerWidth + textMargin,
                         y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
                         size: fontSize,
                         font,

package/lib/actions/translate.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/strin
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+const vanillaAgents = agents.default ?? agents;
 const translateGraph = {
     version: 0.5,
     nodes: {

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export declare const imageOpenaiAgent: AgentFunction<{
     model: string;
     size: OpenAIImageSize | null | undefined;
     moderation: OpenAIModeration | null | undefined;
+    images: string[] | null | undefined;
 }, {
     buffer: Buffer;
 }, {