npm - mulmocast - Versions diffs - 0.1.5 → 0.1.7 - Mend

mulmocast 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/assets/templates/ani.json +48 -0
package/assets/templates/ani_ja.json +45 -0
package/lib/actions/audio.js +2 -0
package/lib/actions/image_agents.d.ts +28 -22
package/lib/actions/image_agents.js +4 -4
package/lib/actions/images.js +12 -21
package/lib/actions/translate.d.ts +4 -1
package/lib/actions/translate.js +6 -3
package/lib/agents/combine_audio_files_agent.js +106 -86
package/lib/agents/movie_replicate_agent.js +4 -3
package/lib/agents/tts_elevenlabs_agent.js +1 -1
package/lib/agents/tts_nijivoice_agent.js +2 -3
package/lib/cli/commands/tool/scripting/builder.js +1 -1
package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
package/lib/cli/commands/tool/story_to_script/builder.js +1 -1
package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
package/lib/index.browser.d.ts +1 -3
package/lib/index.browser.js +2 -4
package/lib/index.common.d.ts +2 -0
package/lib/index.common.js +3 -0
package/lib/index.node.d.ts +7 -0
package/lib/index.node.js +8 -0
package/lib/methods/mulmo_presentation_style.d.ts +15 -1
package/lib/methods/mulmo_presentation_style.js +10 -11
package/lib/tools/story_to_script.d.ts +1 -1
package/lib/types/schema.d.ts +343 -322
package/lib/types/schema.js +21 -14
package/lib/types/type.d.ts +3 -2
package/lib/utils/context.d.ts +73 -72
package/lib/utils/ffmpeg_utils.js +6 -0
package/lib/utils/image_plugins/image.d.ts +2 -2
package/lib/utils/image_plugins/movie.d.ts +2 -2
package/lib/utils/preprocess.d.ts +37 -36
package/lib/utils/provider2agent.d.ts +9 -7
package/lib/utils/provider2agent.js +12 -7
package/lib/utils/utils.d.ts +1 -2
package/lib/utils/utils.js +7 -2
package/package.json +11 -11
package/scripts/templates/presentation.json~ +0 -119

package/assets/templates/ani.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+  "title": "Presentation with Ani in Japanese",
+  "description": "Template for presentation with Ani in Japanese.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "movieParams": {
+      "provider": "replicate",
+      "model": "bytedance/seedance-1-lite"
+    },
+    "speechParams": {
+      "provider": "openai",
+      "speakers": {
+        "Presenter": {
+          "voiceId": "shimmer",
+          "speechOptions": { "instruction": "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl." }
+        }
+      }
+    },
+    "audioParams": {
+      "bgm": {
+        "kind": "url",
+        "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
+      }
+    },
+    "lang": "en",
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "imageParams": {
+      "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
+      "images": {
+        "ani": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/ani_ja.json ADDED Viewed

@@ -0,0 +1,45 @@
+{
+  "title": "Presentation with Ani",
+  "description": "Template for presentation with Ani.",
+  "systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "movieParams": {
+      "provider": "replicate",
+      "model": "bytedance/seedance-1-lite"
+    },
+    "audioParams": {
+      "bgm": {
+        "kind": "url",
+        "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
+      }
+    },
+    "lang": "ja",
+    "canvasSize": {
+      "width": 1024,
+      "height": 1536
+    },
+    "speechParams": {
+      "provider": "nijivoice",
+      "speakers": {
+        "Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
+      }
+    },
+    "imageParams": {
+      "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
+      "images": {
+        "ani": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/lib/actions/audio.js CHANGED Viewed

@@ -143,6 +143,7 @@ const graph_data = {
         },
         addBGM: {
             agent: "addBGMAgent",
+            unless: ":context.presentationStyle.audioParams.bgmVolume.equal(0)",
             inputs: {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
@@ -153,6 +154,7 @@ const graph_data = {
                 },
             },
             isResult: true,
+            defaultValue: {},
         },
         title: {
             agent: "copyAgent",

package/lib/actions/image_agents.d.ts CHANGED Viewed

@@ -44,17 +44,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
 } | {
     imagePath: string;
     imageFromMovie: boolean;
-    movieParams: {
-        speed?: number | undefined;
-        model?: string | undefined;
-        fillOption?: {
-            style: "aspectFit" | "aspectFill";
-        } | undefined;
-        provider?: string | undefined;
-        transition?: {
-            type: "fade" | "slideout_left";
-            duration: number;
-        } | undefined;
+    movieAgentInfo: {
+        agent: string;
+        movieParams: {
+            speed?: number | undefined;
+            provider?: string | undefined;
+            model?: string | undefined;
+            fillOption?: {
+                style: "aspectFit" | "aspectFill";
+            } | undefined;
+            transition?: {
+                type: "fade" | "slideout_left";
+                duration: number;
+            } | undefined;
+        };
     };
     imageParams: {
         provider: string;
@@ -91,17 +94,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
     imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
     prompt: string;
     referenceImages: string[];
-    movieParams: {
-        speed?: number | undefined;
-        model?: string | undefined;
-        fillOption?: {
-            style: "aspectFit" | "aspectFill";
-        } | undefined;
-        provider?: string | undefined;
-        transition?: {
-            type: "fade" | "slideout_left";
-            duration: number;
-        } | undefined;
+    movieAgentInfo: {
+        agent: string;
+        movieParams: {
+            speed?: number | undefined;
+            provider?: string | undefined;
+            model?: string | undefined;
+            fillOption?: {
+                style: "aspectFit" | "aspectFill";
+            } | undefined;
+            transition?: {
+                type: "fade" | "slideout_left";
+                duration: number;
+            } | undefined;
+        };
     };
     imageParams: {
         provider: string;

package/lib/actions/image_agents.js CHANGED Viewed

@@ -28,15 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
         // undefined prompt indicates that image generation is not needed
         return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
     }
-    const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
-    GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
+    const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
+    GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
     if (beat.moviePrompt && !beat.imagePrompt) {
-        return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
+        return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
     }
     // referenceImages for "edit_image", openai agent.
     const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
     const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
-    return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
+    return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieAgentInfo };
 };
 export const imagePluginAgent = async (namedInputs) => {
     const { context, beat, index } = namedInputs;

package/lib/actions/images.js CHANGED Viewed

@@ -39,7 +39,6 @@ const beat_graph_data = {
     nodes: {
         context: {},
         htmlImageAgentInfo: {},
-        movieAgentInfo: {},
         imageRefs: {},
         beat: {},
         __mapIndex: {},
@@ -134,7 +133,7 @@ const beat_graph_data = {
         },
         movieGenerator: {
             if: ":preprocessor.movieFile",
-            agent: ":movieAgentInfo.agent",
+            agent: ":preprocessor.movieAgentInfo.agent",
             inputs: {
                 onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
                 prompt: ":beat.moviePrompt",
@@ -147,7 +146,7 @@ const beat_graph_data = {
                     mulmoContext: ":context",
                 },
                 params: {
-                    model: ":preprocessor.movieParams.model",
+                    model: ":preprocessor.movieAgentInfo.movieParams.model",
                     duration: ":beat.duration",
                     canvasSize: ":context.presentationStyle.canvasSize",
                 },
@@ -167,16 +166,19 @@ const beat_graph_data = {
             defaultValue: {},
         },
         audioChecker: {
-            if: ":preprocessor.movieFile",
             agent: async (namedInputs) => {
-                const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
+                const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
+                if (!sourceFile) {
+                    return { hasMovieAudio: false };
+                }
+                const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
                 return { hasMovieAudio: hasAudio };
             },
             inputs: {
-                onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
+                onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
                 movieFile: ":preprocessor.movieFile",
+                imageFile: ":preprocessor.imagePath",
             },
-            defaultValue: {},
         },
         output: {
             agent: "copyAgent",
@@ -201,7 +203,6 @@ const graph_data = {
     nodes: {
         context: {},
         htmlImageAgentInfo: {},
-        movieAgentInfo: {},
         outputStudioFilePath: {},
         imageRefs: {},
         map: {
@@ -210,7 +211,6 @@ const graph_data = {
                 rows: ":context.studio.script.beats",
                 context: ":context",
                 htmlImageAgentInfo: ":htmlImageAgentInfo",
-                movieAgentInfo: ":movieAgentInfo",
                 imageRefs: ":imageRefs",
             },
             isResult: true,
@@ -293,17 +293,11 @@ export const graphOption = async (context, settings) => {
     const config = settings2GraphAIConfig(settings, process.env);
     // We need to get google's auth token only if the google is the text2image provider.
     if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
-        userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
+        userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
-        config["imageGoogleAgent"] = {
-            projectId: process.env.GOOGLE_PROJECT_ID,
-            token,
-        };
-        config["movieGoogleAgent"] = {
-            projectId: process.env.GOOGLE_PROJECT_ID,
-            token,
-        };
+        config["imageGoogleAgent"].token = token;
+        config["movieGoogleAgent"].token = token;
     }
     options.config = config;
     return options;
@@ -320,9 +314,6 @@ const prepareGenerateImages = async (context) => {
     const injections = {
         context,
         htmlImageAgentInfo,
-        movieAgentInfo: {
-            agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
-        },
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
         imageRefs,
     };

package/lib/actions/translate.d.ts CHANGED Viewed

@@ -1,4 +1,7 @@
 import "dotenv/config";
 import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const translate: (context: MulmoStudioContext, args?: {
+    callbacks?: CallbackFunction[];
+    settings?: Record<string, string>;
+}) => Promise<void>;

package/lib/actions/translate.js CHANGED Viewed

@@ -4,6 +4,7 @@ import * as agents from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
+import { settings2GraphAIConfig } from "../utils/utils.js";
 import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
@@ -208,15 +209,17 @@ const agentFilters = [
 ];
 const defaultLang = "en";
 const targetLangs = ["ja", "en"];
-export const translate = async (context, callbacks) => {
+export const translate = async (context, args) => {
+    const { settings, callbacks } = args ?? {};
     try {
         MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
         const fileName = MulmoStudioContextMethods.getFileName(context);
         const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
         const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
         mkdir(outDirPath);
-        assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
-        const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
+        const config = settings2GraphAIConfig(settings, process.env);
+        assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
+        const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
         graph.injectValue("context", context);
         graph.injectValue("defaultLang", defaultLang);
         graph.injectValue("targetLangs", targetLangs);

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { assert, GraphAILogger } from "graphai";
 import { silent60secPath } from "../utils/file.js";
-import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
+import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration, } from "../utils/ffmpeg_utils.js";
 import { userAssert } from "../utils/utils.js";
 const getMovieDuration = async (beat) => {
     if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
@@ -62,6 +62,93 @@ const getGroupBeatDurations = (context, group, audioDuration) => {
     });
     return durations;
 };
+const getInputIds = (context, mediaDurations, ffmpegContext, silentIds) => {
+    const inputIds = [];
+    context.studio.beats.forEach((studioBeat, index) => {
+        const { silenceDuration } = mediaDurations[index];
+        const paddingId = `[padding_${index}]`;
+        if (studioBeat.audioFile) {
+            const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
+            inputIds.push(audioId);
+        }
+        if (silenceDuration > 0) {
+            const silentId = silentIds.pop();
+            ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
+            inputIds.push(paddingId);
+        }
+    });
+    return inputIds;
+};
+const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations, groupLength) => {
+    return (remaining, idx, iGroup) => {
+        const subBeatDurations = mediaDurations[idx];
+        userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
+        if (iGroup === groupLength - 1) {
+            beatDurations.push(remaining);
+            subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
+            return 0;
+        }
+        const nextBeat = context.studio.script.beats[idx + 1];
+        assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
+        const voiceStartAt = nextBeat.image?.startAt;
+        if (voiceStartAt) {
+            const remainingDuration = movieDuration - voiceStartAt;
+            const duration = remaining - remainingDuration;
+            userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
+            beatDurations.push(duration);
+            subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
+            userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
+            return remainingDuration;
+        }
+        beatDurations.push(subBeatDurations.audioDuration);
+        return remaining - subBeatDurations.audioDuration;
+    };
+};
+const getVoiceOverGroup = (context, index) => {
+    const group = [index];
+    for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
+        group.push(i);
+    }
+    return group;
+};
+const getSpillOverGroup = (context, mediaDurations, index) => {
+    const group = [index];
+    for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
+        group.push(i);
+    }
+    return group;
+};
+const spilledOverAudio = (context, group, audioDuration, beatDurations, mediaDurations) => {
+    const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
+    // Yes, the current beat has spilled over audio.
+    const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
+    if (beatsTotalDuration > audioDuration + 0.01) {
+        // 0.01 is a tolerance to avoid floating point precision issues
+        group.reduce((remaining, idx, iGroup) => {
+            if (remaining >= groupBeatsDurations[iGroup]) {
+                return remaining - groupBeatsDurations[iGroup];
+            }
+            mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
+            return 0;
+        }, audioDuration);
+    }
+    else if (audioDuration > beatsTotalDuration) {
+        // Last beat gets the rest of the audio.
+        groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
+    }
+    beatDurations.push(...groupBeatsDurations);
+};
+const noSpilledOverAudio = (context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations) => {
+    // padding is the amount of audio padding specified in the script.
+    const padding = getPadding(context, beat, index);
+    // totalPadding is the amount of audio padding to be added to the audio file.
+    const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
+    const beatDuration = audioDuration + totalPadding;
+    beatDurations.push(beatDuration);
+    if (totalPadding > 0) {
+        mediaDurations[index].silenceDuration = totalPadding;
+    }
+};
 const combineAudioFilesAgent = async ({ namedInputs, }) => {
     const { context, combinedFileName } = namedInputs;
     const ffmpegContext = FfmpegContextInit();
@@ -77,91 +164,37 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
         const { audioDuration, movieDuration } = mediaDurations[index];
         // Check if we are processing a voice-over beat.
         if (movieDuration > 0) {
-            const group = [index];
-            for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
-                group.push(i);
-            }
+            const group = getVoiceOverGroup(context, index);
             if (group.length > 1) {
-                group.reduce((remaining, idx, iGroup) => {
-                    const subBeatDurations = mediaDurations[idx];
-                    userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
-                    if (iGroup === group.length - 1) {
-                        beatDurations.push(remaining);
-                        subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
-                        return 0;
-                    }
-                    const nextBeat = context.studio.script.beats[idx + 1];
-                    assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
-                    const voiceStartAt = nextBeat.image?.startAt;
-                    if (voiceStartAt) {
-                        const remainingDuration = movieDuration - voiceStartAt;
-                        const duration = remaining - remainingDuration;
-                        userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
-                        beatDurations.push(duration);
-                        subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
-                        userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
-                        return remainingDuration;
-                    }
-                    beatDurations.push(subBeatDurations.audioDuration);
-                    return remaining - subBeatDurations.audioDuration;
-                }, movieDuration);
+                GraphAILogger.log(`Voice over group: ${group.length}`);
+                group.reduce(voiceOverProcess(context, mediaDurations, movieDuration, beatDurations, group.length), movieDuration);
                 return;
             }
         }
         // Check if the current beat has media and the next beat does not have media.
         if (audioDuration > 0) {
             // Check if the current beat has spilled over audio.
-            const group = [index];
-            for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
-                group.push(i);
-            }
+            const group = getSpillOverGroup(context, mediaDurations, index);
             if (group.length > 1) {
-                const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
-                // Yes, the current beat has spilled over audio.
-                const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
-                if (beatsTotalDuration > audioDuration + 0.01) {
-                    // 0.01 is a tolerance to avoid floating point precision issues
-                    group.reduce((remaining, idx, iGroup) => {
-                        if (remaining >= groupBeatsDurations[iGroup]) {
-                            return remaining - groupBeatsDurations[iGroup];
-                        }
-                        mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
-                        return 0;
-                    }, audioDuration);
-                }
-                else {
-                    // Last beat gets the rest of the audio.
-                    if (audioDuration > beatsTotalDuration) {
-                        groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
-                    }
-                }
-                beatDurations.push(...groupBeatsDurations);
-            }
-            else {
-                // No spilled over audio.
-                assert(beatDurations.length === index, "beatDurations.length !== index");
-                // padding is the amount of audio padding specified in the script.
-                const padding = getPadding(context, beat, index);
-                // totalPadding is the amount of audio padding to be added to the audio file.
-                const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
-                const beatDuration = audioDuration + totalPadding;
-                beatDurations.push(beatDuration);
-                if (totalPadding > 0) {
-                    mediaDurations[index].silenceDuration = totalPadding;
-                }
+                GraphAILogger.log(`Spill over group: ${group.length}`);
+                spilledOverAudio(context, group, audioDuration, beatDurations, mediaDurations);
+                return;
             }
+            // No spilled over audio.
+            assert(beatDurations.length === index, "beatDurations.length !== index");
+            noSpilledOverAudio(context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations);
+            return;
         }
-        else if (movieDuration > 0) {
+        if (movieDuration > 0) {
             // This beat has only a movie, not audio.
             beatDurations.push(movieDuration);
             mediaDurations[index].silenceDuration = movieDuration;
+            return;
         }
-        else {
-            // The current beat has no audio, nor no spilled over audio
-            const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
-            beatDurations.push(beatDuration);
-            mediaDurations[index].silenceDuration = beatDuration;
-        }
+        // The current beat has no audio, nor no spilled over audio
+        const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
+        beatDurations.push(beatDuration);
+        mediaDurations[index].silenceDuration = beatDuration;
     });
     assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
     // We cannot reuse longSilentId. We need to explicitly split it for each beat.
@@ -170,20 +203,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
         const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
         ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
     }
-    const inputIds = [];
-    context.studio.beats.forEach((studioBeat, index) => {
-        const { silenceDuration } = mediaDurations[index];
-        const paddingId = `[padding_${index}]`;
-        if (studioBeat.audioFile) {
-            const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
-            inputIds.push(audioId);
-        }
-        if (silenceDuration > 0) {
-            const silentId = silentIds.pop();
-            ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
-            inputIds.push(paddingId);
-        }
-    });
+    const inputIds = getInputIds(context, mediaDurations, ffmpegContext, silentIds);
     assert(silentIds.length === 0, "silentIds.length !== 0");
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     // Finally, combine all audio files.

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { readFileSync } from "fs";
 import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
+import { provider2MovieAgent } from "../utils/provider2agent.js";
 async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
     const replicate = new Replicate({
         auth: apiKey,
@@ -21,7 +22,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
     if (imagePath) {
         const buffer = readFileSync(imagePath);
         const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
-        if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
+        if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro" || model === "minimax/hailuo-02") {
             input.start_image = base64Image;
         }
         else {
@@ -29,7 +30,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
         }
     }
     try {
-        const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
+        const output = await replicate.run(model ?? provider2MovieAgent.replicate.defaultModel, { input });
         // Download the generated video
         if (output && typeof output === "object" && "url" in output) {
             const videoUrl = output.url();
@@ -62,7 +63,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
     const { prompt, imagePath } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
     const duration = params.duration ?? 5;
-    const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
+    const apiKey = config?.apiKey;
     if (!apiKey) {
         throw new Error("REPLICATE_API_TOKEN environment variable is required");
     }

package/lib/agents/tts_elevenlabs_agent.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { provider2TTSAgent } from "../utils/provider2agent.js";
 export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
     const { text } = namedInputs;
     const { voice, model, stability, similarityBoost, suppressError } = params;
-    const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
+    const apiKey = config?.apiKey;
     if (!apiKey) {
         throw new Error("ELEVENLABS_API_KEY environment variable is required");
     }

package/lib/agents/tts_nijivoice_agent.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { GraphAILogger, assert } from "graphai";
-const nijovoiceApiKey = process.env.NIJIVOICE_API_KEY ?? "";
 const errorMessage = [
     "TTS NijiVoice: No API key. ",
     "You have the following options:",
@@ -10,12 +9,12 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
     const { suppressError, voice, speed, speed_global } = params;
     const { apiKey } = config ?? {};
     const { text } = namedInputs;
-    assert(!!(apiKey ?? nijovoiceApiKey), errorMessage);
+    assert(!!apiKey, errorMessage);
     const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
     const options = {
         method: "POST",
         headers: {
-            "x-api-key": apiKey ?? nijovoiceApiKey,
+            "x-api-key": apiKey,
             accept: "application/json",
             "content-type": "application/json",
         },

package/lib/cli/commands/tool/scripting/builder.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { llm } from "../../../../utils/utils.js";
+import { llm } from "../../../../utils/provider2agent.js";
 import { getAvailableTemplates } from "../../../../utils/file.js";
 const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
 export const builder = (yargs) => {

package/lib/cli/commands/tool/scripting/handler.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { ToolCliArgs } from "../../../../types/cli_types.js";
-import { LLM } from "../../../../utils/utils.js";
+import type { LLM } from "../../../../utils/provider2agent.js";
 export declare const handler: (argv: ToolCliArgs<{
     o?: string;
     b?: string;