npm - mulmocast - Versions diffs - 0.0.18 → 0.0.20 - Mend

mulmocast 0.0.18 → 0.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/README.md +44 -36
package/assets/templates/ghibli_image_only.json +28 -0
package/lib/actions/audio.js +13 -11
package/lib/actions/captions.js +2 -3
package/lib/actions/images.d.ts +5 -0
package/lib/actions/images.js +41 -17
package/lib/actions/movie.js +17 -3
package/lib/actions/translate.js +3 -3
package/lib/agents/add_bgm_agent.js +2 -2
package/lib/agents/combine_audio_files_agent.js +96 -53
package/lib/agents/image_openai_agent.js +2 -1
package/lib/agents/validate_schema_agent.d.ts +1 -1
package/lib/agents/validate_schema_agent.js +3 -3
package/lib/cli/helpers.js +6 -1
package/lib/index.browser.d.ts +2 -0
package/lib/index.browser.js +3 -0
package/lib/index.d.ts +1 -0
package/lib/index.js +1 -0
package/lib/methods/mulmo_presentation_style.js +2 -1
package/lib/types/schema.d.ts +197 -129
package/lib/types/schema.js +9 -5
package/lib/utils/const.d.ts +1 -0
package/lib/utils/const.js +1 -0
package/lib/utils/file.d.ts +1 -0
package/lib/utils/file.js +4 -0
package/lib/utils/image_plugins/beat.d.ts +1 -0
package/lib/utils/image_plugins/beat.js +3 -0
package/lib/utils/image_plugins/chart.d.ts +1 -0
package/lib/utils/image_plugins/chart.js +2 -0
package/lib/utils/image_plugins/html_tailwind.d.ts +1 -0
package/lib/utils/image_plugins/html_tailwind.js +2 -0
package/lib/utils/image_plugins/image.d.ts +1 -0
package/lib/utils/image_plugins/image.js +1 -0
package/lib/utils/image_plugins/index.d.ts +3 -3
package/lib/utils/image_plugins/index.js +6 -3
package/lib/utils/image_plugins/markdown.d.ts +1 -0
package/lib/utils/image_plugins/markdown.js +2 -0
package/lib/utils/image_plugins/mermaid.d.ts +1 -0
package/lib/utils/image_plugins/mermaid.js +3 -1
package/lib/utils/image_plugins/movie.d.ts +1 -0
package/lib/utils/image_plugins/movie.js +1 -0
package/lib/utils/image_plugins/source.js +1 -1
package/lib/utils/image_plugins/text_slide.d.ts +1 -0
package/lib/utils/image_plugins/text_slide.js +2 -0
package/lib/utils/image_plugins/utils.d.ts +2 -0
package/lib/utils/image_plugins/utils.js +3 -0
package/lib/utils/preprocess.d.ts +3 -1
package/package.json +13 -3
package/scripts/templates/image_prompt_only_template.json +27 -0
package/lib/agents/image_mock_agent.d.ts +0 -4
package/lib/agents/image_mock_agent.js +0 -18
package/lib/agents/mulmo_prompts_agent.d.ts +0 -7
package/lib/agents/mulmo_prompts_agent.js +0 -37
package/lib/agents/prompts_data.d.ts +0 -15
package/lib/agents/prompts_data.js +0 -16
package/lib/agents/validate_mulmo_script_agent.d.ts +0 -17
package/lib/agents/validate_mulmo_script_agent.js +0 -34
package/lib/cli/args.d.ts +0 -15
package/lib/cli/args.js +0 -62
package/lib/cli/cli.d.ts +0 -17
package/lib/cli/cli.js +0 -117
package/lib/cli/run.d.ts +0 -2
package/lib/cli/run.js +0 -3
package/lib/cli/tool-args.d.ts +0 -16
package/lib/cli/tool-args.js +0 -64
package/lib/cli/tool-cli.d.ts +0 -2
package/lib/cli/tool-cli.js +0 -69
package/lib/methods/mulmo_script.d.ts +0 -11
package/lib/methods/mulmo_script.js +0 -59
package/lib/methods/mulmo_studio.d.ts +0 -8
package/lib/methods/mulmo_studio.js +0 -24
package/lib/tools/prompt.d.ts +0 -1
package/lib/tools/prompt.js +0 -18
package/lib/utils/image_plugins/tailwind.d.ts +0 -3
package/lib/utils/image_plugins/tailwind.js +0 -18
package/lib/utils/pdf.d.ts +0 -9
package/lib/utils/pdf.js +0 -77

package/README.md CHANGED Viewed

@@ -288,14 +288,16 @@ Positionals:
   file  Mulmo Script File                                    [string] [required]
 Options:
-      --version   Show version number                                  [boolean]
-  -v, --verbose   verbose log              [boolean] [required] [default: false]
-  -h, --help      Show help                                            [boolean]
-  -o, --outdir    output dir                                            [string]
-  -b, --basedir   base dir                                              [string]
-  -l, --lang      target language                 [string] [choices: "en", "ja"]
-  -f, --force     Force regenerate                    [boolean] [default: false]
-  -a, --audiodir  Audio output directory                                [string]
+      --version            Show version number                         [boolean]
+  -v, --verbose            verbose log     [boolean] [required] [default: false]
+  -h, --help               Show help                                   [boolean]
+  -o, --outdir             output dir                                   [string]
+  -b, --basedir            base dir                                     [string]
+  -l, --lang               target language        [string] [choices: "en", "ja"]
+  -f, --force              Force regenerate           [boolean] [default: false]
+      --dryRun             Dry run                    [boolean] [default: false]
+  -p, --presentationStyle  Presentation Style                           [string]
+  -a, --audiodir           Audio output directory                       [string]
 ```
 ```
@@ -307,14 +309,16 @@ Positionals:
   file  Mulmo Script File                                    [string] [required]
 Options:
-      --version   Show version number                                  [boolean]
-  -v, --verbose   verbose log              [boolean] [required] [default: false]
-  -h, --help      Show help                                            [boolean]
-  -o, --outdir    output dir                                            [string]
-  -b, --basedir   base dir                                              [string]
-  -l, --lang      target language                 [string] [choices: "en", "ja"]
-  -f, --force     Force regenerate                    [boolean] [default: false]
-  -i, --imagedir  Image output directory                                [string]
+      --version            Show version number                         [boolean]
+  -v, --verbose            verbose log     [boolean] [required] [default: false]
+  -h, --help               Show help                                   [boolean]
+  -o, --outdir             output dir                                   [string]
+  -b, --basedir            base dir                                     [string]
+  -l, --lang               target language        [string] [choices: "en", "ja"]
+  -f, --force              Force regenerate           [boolean] [default: false]
+      --dryRun             Dry run                    [boolean] [default: false]
+  -p, --presentationStyle  Presentation Style                           [string]
+  -i, --imagedir           Image output directory                       [string]
 ```
 ```
@@ -326,16 +330,18 @@ Positionals:
   file  Mulmo Script File                                    [string] [required]
 Options:
-      --version   Show version number                                  [boolean]
-  -v, --verbose   verbose log              [boolean] [required] [default: false]
-  -h, --help      Show help                                            [boolean]
-  -o, --outdir    output dir                                            [string]
-  -b, --basedir   base dir                                              [string]
-  -l, --lang      target language                 [string] [choices: "en", "ja"]
-  -f, --force     Force regenerate                    [boolean] [default: false]
-  -a, --audiodir  Audio output directory                                [string]
-  -i, --imagedir  Image output directory                                [string]
-  -c, --caption   Video captions                  [string] [choices: "en", "ja"]
+      --version            Show version number                         [boolean]
+  -v, --verbose            verbose log     [boolean] [required] [default: false]
+  -h, --help               Show help                                   [boolean]
+  -o, --outdir             output dir                                   [string]
+  -b, --basedir            base dir                                     [string]
+  -l, --lang               target language        [string] [choices: "en", "ja"]
+  -f, --force              Force regenerate           [boolean] [default: false]
+      --dryRun             Dry run                    [boolean] [default: false]
+  -p, --presentationStyle  Presentation Style                           [string]
+  -a, --audiodir           Audio output directory                       [string]
+  -i, --imagedir           Image output directory                       [string]
+  -c, --caption            Video captions         [string] [choices: "en", "ja"]
 ```
 ```
@@ -347,17 +353,19 @@ Positionals:
   file  Mulmo Script File                                    [string] [required]
 Options:
-      --version   Show version number                                  [boolean]
-  -v, --verbose   verbose log              [boolean] [required] [default: false]
-  -h, --help      Show help                                            [boolean]
-  -o, --outdir    output dir                                            [string]
-  -b, --basedir   base dir                                              [string]
-  -l, --lang      target language                 [string] [choices: "en", "ja"]
-  -f, --force     Force regenerate                    [boolean] [default: false]
-  -i, --imagedir  Image output directory                                [string]
-      --pdf_mode  PDF mode
+      --version            Show version number                         [boolean]
+  -v, --verbose            verbose log     [boolean] [required] [default: false]
+  -h, --help               Show help                                   [boolean]
+  -o, --outdir             output dir                                   [string]
+  -b, --basedir            base dir                                     [string]
+  -l, --lang               target language        [string] [choices: "en", "ja"]
+  -f, --force              Force regenerate           [boolean] [default: false]
+      --dryRun             Dry run                    [boolean] [default: false]
+  -p, --presentationStyle  Presentation Style                           [string]
+  -i, --imagedir           Image output directory                       [string]
+      --pdf_mode           PDF mode
                [string] [choices: "slide", "talk", "handout"] [default: "slide"]
-      --pdf_size  PDF paper size (default: letter)
+      --pdf_size           PDF paper size (default: letter)
                                    [choices: "letter", "a4"] [default: "letter"]
 ```

package/assets/templates/ghibli_image_only.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Ghibli comic image-only",
+  "description": "Template for Ghibli-style image-only comic presentation.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompt_only_template.json"
+}

package/lib/actions/audio.js CHANGED Viewed

@@ -178,9 +178,9 @@ const agentFilters = [
     },
 ];
 export const audioFilePath = (context) => {
-    const { studio, fileDirs } = context;
-    const { outDirPath } = fileDirs;
-    return getAudioArtifactFilePath(outDirPath, studio.filename);
+    const fileName = MulmoStudioContextMethods.getFileName(context);
+    const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
+    return getAudioArtifactFilePath(outDirPath, fileName);
 };
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
@@ -203,9 +203,10 @@ const audioAgents = {
 export const generateBeatAudio = async (index, context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
-        const { studio, fileDirs } = context;
-        const { outDirPath, audioDirPath } = fileDirs;
-        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        const fileName = MulmoStudioContextMethods.getFileName(context);
+        const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
+        const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
         const taskManager = new TaskManager(getConcurrency(context));
@@ -229,12 +230,13 @@ export const generateBeatAudio = async (index, context, callbacks) => {
 export const audio = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
-        const { studio, fileDirs, lang } = context;
-        const { outDirPath, audioDirPath } = fileDirs;
+        const fileName = MulmoStudioContextMethods.getFileName(context);
+        const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
+        const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
         const audioArtifactFilePath = audioFilePath(context);
-        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
-        const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
-        const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
+        const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
+        const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
         const taskManager = new TaskManager(getConcurrency(context));

package/lib/actions/captions.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
-import { getHTMLFile } from "../utils/file.js";
+import { getHTMLFile, getCaptionImagePath } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
 import { MulmoStudioContextMethods, MulmoPresentationStyleMethods } from "../methods/index.js";
 const vanillaAgents = agents.default ?? agents;
@@ -23,10 +23,9 @@ const graph_data = {
                             const { beat, context, index } = namedInputs;
                             try {
                                 MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
-                                const imageDirPath = MulmoStudioContextMethods.getImageDirPath(context);
                                 const caption = MulmoStudioContextMethods.getCaption(context);
                                 const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
-                                const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
+                                const imagePath = getCaptionImagePath(context, index);
                                 const template = getHTMLFile("caption");
                                 const text = (() => {
                                     const multiLingual = context.multiLingual;

package/lib/actions/images.d.ts CHANGED Viewed

@@ -85,6 +85,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
     referenceImage: string;
     prompt: string;
 }>;
+export declare const imagePluginAgent: (namedInputs: {
+    context: MulmoStudioContext;
+    beat: MulmoBeat;
+    index: number;
+}) => Promise<void>;
 export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
 export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
 export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -8,8 +8,9 @@ import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getRefe
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
-import { imagePlugins } from "../utils/image_plugins/index.js";
+import { findImagePlugin } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
+import { defaultOpenAIImageModel } from "../utils/const.js";
 const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
@@ -30,19 +31,13 @@ export const imagePreprocessAgent = async (namedInputs) => {
         movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
     };
     if (beat.image) {
-        const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
-        if (plugin) {
-            try {
-                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
-                const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
-                const path = await plugin.process(processorParams);
-                // undefined prompt indicates that image generation is not needed
-                return { imagePath: path, referenceImage: path, ...returnValue };
-            }
-            finally {
-                MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
-            }
+        const plugin = findImagePlugin(beat?.image?.type);
+        if (!plugin) {
+            throw new Error(`invalid beat image type: ${beat.image}`);
         }
+        const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
+        // undefined prompt indicates that image generation is not needed
+        return { imagePath: path, referenceImage: path, ...returnValue };
     }
     // images for "edit_image"
     const images = (() => {
@@ -56,6 +51,24 @@ export const imagePreprocessAgent = async (namedInputs) => {
     const prompt = imagePrompt(beat, imageParams.style);
     return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
 };
+export const imagePluginAgent = async (namedInputs) => {
+    const { context, beat, index } = namedInputs;
+    const imagePath = getBeatPngImagePath(context, index);
+    const plugin = findImagePlugin(beat?.image?.type);
+    if (!plugin) {
+        throw new Error(`invalid beat image type: ${beat.image}`);
+    }
+    try {
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
+        const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
+        await plugin.process(processorParams);
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
+    }
+    catch (error) {
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
+        throw error;
+    }
+};
 const beat_graph_data = {
     version: 0.5,
     concurrency: 4,
@@ -76,6 +89,17 @@ const beat_graph_data = {
                 imageRefs: ":imageRefs",
             },
         },
+        imagePlugin: {
+            if: ":beat.image",
+            defaultValue: {},
+            agent: imagePluginAgent,
+            inputs: {
+                context: ":context",
+                beat: ":beat",
+                index: ":__mapIndex",
+                onComplete: ":preprocessor",
+            },
+        },
         imageGenerator: {
             if: ":preprocessor.prompt",
             agent: ":imageAgentInfo.agent",
@@ -101,7 +125,7 @@ const beat_graph_data = {
             if: ":preprocessor.movieFile",
             agent: ":movieAgentInfo.agent",
             inputs: {
-                onComplete: ":imageGenerator", // to wait for imageGenerator to finish
+                onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
                 prompt: ":beat.moviePrompt",
                 imagePath: ":preprocessor.referenceImage",
                 file: ":preprocessor.movieFile",
@@ -303,7 +327,7 @@ export const getImageRefs = async (context) => {
     return imageRefs;
 };
 const prepareGenerateImages = async (context) => {
-    const { studio } = context;
+    const fileName = MulmoStudioContextMethods.getFileName(context);
     const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
     const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
     mkdir(imageProjectDirPath);
@@ -316,7 +340,7 @@ const prepareGenerateImages = async (context) => {
         movieAgentInfo: {
             agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
         },
-        outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
+        outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
         imageRefs,
     };
     return injections;
@@ -327,7 +351,7 @@ const getConcurrency = (context) => {
         // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
         // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
         // gpt-image-1：3,000,000 TPM、150 images per minute
-        return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
+        return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
     }
     return 4;
 };

package/lib/actions/movie.js CHANGED Viewed

@@ -140,10 +140,24 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
             return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
                 const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
                 const processedVideoId = `${transitionVideoId}_f`;
-                // If we can to add other transition types than fade, we need to add them here.
-                ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
+                let transitionFilter;
+                if (transition.type === "fade") {
+                    transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
+                }
+                else if (transition.type === "slideout_left") {
+                    transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
+                }
+                else {
+                    throw new Error(`Unknown transition type: ${transition.type}`);
+                }
+                ffmpegContext.filterComplex.push(transitionFilter);
                 const outputId = `${transitionVideoId}_o`;
-                ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+                if (transition.type === "fade") {
+                    ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+                }
+                else if (transition.type === "slideout_left") {
+                    ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+                }
                 return outputId;
             }, concatVideoId);
         }

package/lib/actions/translate.js CHANGED Viewed

@@ -211,9 +211,9 @@ const targetLangs = ["ja", "en"];
 export const translate = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
-        const { studio, fileDirs } = context;
-        const { outDirPath } = fileDirs;
-        const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, studio.filename);
+        const fileName = MulmoStudioContextMethods.getFileName(context);
+        const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
+        const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
         mkdir(outDirPath);
         assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
         const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -11,8 +11,8 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
     const ffmpegContext = FfmpegContextInit();
     const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile);
     const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
-    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=0.2[music]`);
-    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=2, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
+    ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
+    ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
     ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
     ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
     ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -18,87 +18,130 @@ const getPadding = (context, beat, index) => {
     const isClosingGap = index === context.studio.beats.length - 2;
     return isClosingGap ? context.presentationStyle.audioParams.closingPadding : context.presentationStyle.audioParams.padding;
 };
-const getTotalPadding = (padding, movieDuration, audioDuration, duration, canSpillover = false) => {
+const getTotalPadding = (padding, movieDuration, audioDuration, duration) => {
     if (movieDuration > 0) {
         return padding + (movieDuration - audioDuration);
     }
     else if (duration && duration > audioDuration) {
         return padding + (duration - audioDuration);
     }
-    else if (canSpillover && duration && audioDuration > duration) {
-        return duration - audioDuration; // negative value to indicate that there is a spill over.
-    }
     return padding;
 };
-const combineAudioFilesAgent = async ({ namedInputs, }) => {
-    const { context, combinedFileName } = namedInputs;
-    const ffmpegContext = FfmpegContextInit();
-    const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
-    // We cannot reuse longSilentId. We need to explicitly split it for each beat.
-    const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
-    ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
-    // First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
-    const mediaDurations = await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
+const getMediaDurations = (context) => {
+    return Promise.all(context.studio.beats.map(async (studioBeat, index) => {
         const beat = context.studio.script.beats[index];
         const movieDuration = await getMovieDulation(beat);
         const audioDuration = studioBeat.audioFile ? await ffmpegGetMediaDuration(studioBeat.audioFile) : 0;
         return {
             movieDuration,
             audioDuration,
+            hasMadia: movieDuration + audioDuration > 0,
+            silenceDuration: 0,
         };
     }));
-    const inputIds = [];
+};
+const getGroupBeatDurations = (context, group, audioDuration) => {
+    const specifiedSum = group
+        .map((idx) => context.studio.script.beats[idx].duration)
+        .filter((d) => d !== undefined)
+        .reduce((a, b) => a + b, 0);
+    const unspecified = group.filter((idx) => context.studio.script.beats[idx].duration === undefined);
+    const minTotal = 1.0 * unspecified.length;
+    const rest = Math.max(audioDuration - specifiedSum, minTotal);
+    const durationForUnspecified = rest / (unspecified.length || 1);
+    const durations = group.map((idx) => {
+        const duration = context.studio.script.beats[idx].duration;
+        if (duration === undefined) {
+            return durationForUnspecified;
+        }
+        return duration;
+    });
+    return durations;
+};
+const combineAudioFilesAgent = async ({ namedInputs, }) => {
+    const { context, combinedFileName } = namedInputs;
+    const ffmpegContext = FfmpegContextInit();
+    // First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
+    const mediaDurations = await getMediaDurations(context);
     const beatDurations = [];
-    context.studio.beats.reduce((spillover, studioBeat, index) => {
-        const beat = context.studio.script.beats[index];
+    context.studio.script.beats.forEach((beat, index) => {
         const { audioDuration, movieDuration } = mediaDurations[index];
-        const paddingId = `[padding_${index}]`;
-        const canSpillover = index < context.studio.beats.length - 1 && mediaDurations[index + 1].movieDuration + mediaDurations[index + 1].audioDuration === 0;
-        if (studioBeat.audioFile) {
-            const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
-            // padding is the amount of audio padding specified in the script.
-            const padding = getPadding(context, beat, index);
-            // totalPadding is the amount of audio padding to be added to the audio file.
-            const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration, canSpillover);
-            beatDurations.push(audioDuration + totalPadding);
-            if (totalPadding > 0) {
-                const silentId = silentIds.pop();
-                ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${totalPadding}${paddingId}`);
-                inputIds.push(audioId, paddingId);
+        // Check if the current beat has media and the next beat does not have media.
+        if (audioDuration > 0) {
+            // Check if the current beat has spilled over audio.
+            const group = [index];
+            for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMadia; i++) {
+                group.push(i);
+            }
+            if (group.length > 1) {
+                const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
+                // Yes, the current beat has spilled over audio.
+                const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
+                if (beatsTotalDuration > audioDuration) {
+                    group.reduce((remaining, idx, iGroup) => {
+                        if (remaining >= groupBeatsDurations[iGroup]) {
+                            return remaining - groupBeatsDurations[iGroup];
+                        }
+                        mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
+                        return 0;
+                    }, audioDuration);
+                }
+                else {
+                    // Last beat gets the rest of the audio.
+                    groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
+                }
+                beatDurations.push(...groupBeatsDurations);
             }
             else {
-                inputIds.push(audioId);
-                if (totalPadding < 0) {
-                    return -totalPadding;
+                // No spilled over audio.
+                assert(beatDurations.length === index, "beatDurations.length !== index");
+                // padding is the amount of audio padding specified in the script.
+                const padding = getPadding(context, beat, index);
+                // totalPadding is the amount of audio padding to be added to the audio file.
+                const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
+                const beatDuration = audioDuration + totalPadding;
+                beatDurations.push(beatDuration);
+                if (totalPadding > 0) {
+                    mediaDurations[index].silenceDuration = totalPadding;
                 }
             }
         }
-        else {
-            // NOTE: We come here when the text is empty and no audio property is specified.
-            const beatDuration = (() => {
-                const duration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
-                if (!canSpillover && duration < spillover) {
-                    return spillover; // We need to consume the spillover here.
-                }
-                return duration;
-            })();
+        else if (movieDuration > 0) {
+            // This beat has only a movie, not audio.
+            assert(beatDurations.length === index, "beatDurations.length !== index");
+            beatDurations.push(movieDuration);
+            mediaDurations[index].silenceDuration = movieDuration;
+        }
+        else if (beatDurations.length === index) {
+            // The current beat has no audio, nor no spilled over audio
+            const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
             beatDurations.push(beatDuration);
-            if (beatDuration <= spillover) {
-                return spillover - beatDuration;
-            }
+            mediaDurations[index].silenceDuration = beatDuration;
+        }
+        // else { Skip this beat if the duration has been already added as a group }
+    });
+    assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
+    // We cannot reuse longSilentId. We need to explicitly split it for each beat.
+    const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
+    if (silentIds.length > 0) {
+        const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
+        ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
+    }
+    const inputIds = [];
+    context.studio.beats.forEach((studioBeat, index) => {
+        const { silenceDuration } = mediaDurations[index];
+        const paddingId = `[padding_${index}]`;
+        if (studioBeat.audioFile) {
+            const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
+            inputIds.push(audioId);
+        }
+        if (silenceDuration > 0) {
             const silentId = silentIds.pop();
-            ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${beatDuration - spillover}${paddingId}`);
+            ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
             inputIds.push(paddingId);
         }
-        return 0;
-    }, 0);
-    assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
-    // We need to "consume" extra silentIds.
-    silentIds.forEach((silentId, index) => {
-        const extraId = `[silent_extra_${index}]`;
-        ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}${extraId}`);
-        inputIds.push(extraId);
     });
+    assert(silentIds.length === 0, "silentIds.length !== 0");
     // Finally, combine all audio files.
     ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
     await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -1,11 +1,12 @@
 import fs from "fs";
 import path from "path";
 import OpenAI, { toFile } from "openai";
+import { defaultOpenAIImageModel } from "../utils/const.js";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params }) => {
     const { prompt, images } = namedInputs;
     const { apiKey, moderation, canvasSize } = params;
-    const model = params.model ?? "dall-e-3";
+    const model = params.model ?? defaultOpenAIImageModel;
     const openai = new OpenAI({ apiKey });
     const size = (() => {
         if (model === "gpt-image-1") {

package/lib/agents/validate_schema_agent.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { AgentFunction, AgentFunctionInfo, DefaultConfigData } from "graphai";
+import { type AgentFunction, type AgentFunctionInfo, type DefaultConfigData } from "graphai";
 import { MulmoScript } from "../types/index.js";
 import { ZodSchema } from "zod";
 interface ValidateMulmoScriptInputs {

package/lib/agents/validate_schema_agent.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import assert from "node:assert";
+import { assert } from "graphai";
 /**
  * Zod schema validation agent
  * Validates if a JSON string conforms to the Zod schema
@@ -6,8 +6,8 @@ import assert from "node:assert";
 export const validateSchemaAgent = async ({ namedInputs, }) => {
     const { text, schema } = namedInputs;
     try {
-        assert(schema, "schema is required");
-        assert(text, "text is required");
+        assert(!!schema, "schema is required");
+        assert(!!text, "text is required");
         const jsonData = JSON.parse(text);
         const parsed = schema.parse(jsonData);
         return {

package/lib/cli/helpers.js CHANGED Viewed

@@ -83,7 +83,12 @@ export const fetchScript = async (isHttpPath, mulmoFilePath, fileOrUrl) => {
 export const getMultiLingual = (multilingualFilePath, beatsLength) => {
     if (fs.existsSync(multilingualFilePath)) {
         const jsonData = readMulmoScriptFile(multilingualFilePath, "ERROR: File does not exist " + multilingualFilePath)?.mulmoData ?? null;
-        return mulmoStudioMultiLingualSchema.parse(jsonData);
+        const dataSet = mulmoStudioMultiLingualSchema.parse(jsonData);
+        while (dataSet.length < beatsLength) {
+            dataSet.push({ multiLingualTexts: {} });
+        }
+        dataSet.length = beatsLength;
+        return dataSet;
     }
     return [...Array(beatsLength)].map(() => ({ multiLingualTexts: {} }));
 };