npm - mulmocast - Versions diffs - 0.0.22 → 0.0.24 - Mend

mulmocast 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +5 -0
package/assets/html/caption.html +2 -0
package/lib/actions/audio.d.ts +2 -2
package/lib/actions/audio.js +8 -7
package/lib/actions/captions.js +7 -5
package/lib/actions/images.d.ts +9 -5
package/lib/actions/images.js +73 -36
package/lib/actions/movie.d.ts +2 -2
package/lib/actions/movie.js +24 -9
package/lib/agents/combine_audio_files_agent.js +9 -5
package/lib/agents/image_openai_agent.d.ts +2 -0
package/lib/agents/image_openai_agent.js +3 -2
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/movie_replicate_agent.d.ts +23 -0
package/lib/agents/movie_replicate_agent.js +93 -0
package/lib/agents/tts_elevenlabs_agent.js +2 -2
package/lib/agents/tts_nijivoice_agent.js +3 -2
package/lib/agents/tts_openai_agent.js +3 -2
package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
package/lib/cli/commands/tool/scripting/builder.js +5 -0
package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
package/lib/cli/commands/tool/scripting/handler.js +13 -4
package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
package/lib/cli/helpers.js +8 -3
package/lib/methods/mulmo_presentation_style.d.ts +2 -1
package/lib/methods/mulmo_presentation_style.js +21 -2
package/lib/methods/mulmo_studio_context.js +1 -1
package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
package/lib/tools/create_mulmo_script_from_url.js +129 -43
package/lib/types/schema.d.ts +793 -163
package/lib/types/schema.js +32 -1
package/lib/types/type.d.ts +9 -2
package/lib/utils/ffmpeg_utils.d.ts +1 -1
package/lib/utils/ffmpeg_utils.js +2 -2
package/lib/utils/preprocess.d.ts +29 -6
package/lib/utils/prompt.d.ts +2 -1
package/lib/utils/prompt.js +10 -0
package/lib/utils/utils.d.ts +3 -0
package/lib/utils/utils.js +47 -0
package/package.json +3 -2
package/scripts/templates/presentation.json +123 -0
package/scripts/templates/presentation.json~ +119 -0

package/README.md CHANGED Viewed

@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
 See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
+#### (Optional) For Movie models
+```bash
+REPLICATE_API_TOKEN=your_replicate_api_key
+```
 #### (Optional) For TTS models
 ```bash
 # For Nijivoice TTS

package/assets/html/caption.html CHANGED Viewed

@@ -19,6 +19,7 @@
     }
     .caption {
       /* Text positioned at the bottom */
+      width: 80%;
       position: absolute;
       bottom: 0px;
       /* Enable text wrapping */
@@ -34,6 +35,7 @@
       padding-right: 10%;
       padding-top: 4px;
       background: rgba(0, 0, 0, 0.4);
+      ${styles}
     }
   </style>
 </head>

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -3,5 +3,5 @@ import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
 export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
 export declare const audioFilePath: (context: MulmoStudioContext) => string;
-export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
-export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
+export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;

package/lib/actions/audio.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import "dotenv/config";
-import { GraphAI } from "graphai";
-import { TaskManager } from "graphai/lib/task_manager.js";
+import { GraphAI, TaskManager } from "graphai";
 import * as agents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -12,7 +11,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
-import { text2hash, localizedText } from "../utils/utils.js";
+import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 const vanillaAgents = agents.default ?? agents;
@@ -200,7 +199,7 @@ const audioAgents = {
     addBGMAgent,
     combineAudioFilesAgent,
 };
-export const generateBeatAudio = async (index, context, callbacks) => {
+export const generateBeatAudio = async (index, context, settings, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -209,8 +208,9 @@ export const generateBeatAudio = async (index, context, callbacks) => {
         const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
+        const config = settings2GraphAIConfig(settings);
         const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
+        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager, config });
         graph.injectValue("__mapIndex", index);
         graph.injectValue("beat", context.studio.script.beats[index]);
         graph.injectValue("studioBeat", context.studio.beats[index]);
@@ -227,7 +227,7 @@ export const generateBeatAudio = async (index, context, callbacks) => {
         MulmoStudioContextMethods.setSessionState(context, "audio", false);
     }
 };
-export const audio = async (context, callbacks) => {
+export const audio = async (context, settings, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -239,8 +239,9 @@ export const audio = async (context, callbacks) => {
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
+        const config = settings2GraphAIConfig(settings);
         const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
+        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);

package/lib/actions/captions.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { mulmoCaptionParamsSchema } from "../types/index.js";
 import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { getHTMLFile, getCaptionImagePath } from "../utils/file.js";
@@ -23,22 +24,23 @@ const graph_data = {
                             const { beat, context, index } = namedInputs;
                             try {
                                 MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
-                                const caption = MulmoStudioContextMethods.getCaption(context);
+                                const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
                                 const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
                                 const imagePath = getCaptionImagePath(context, index);
                                 const template = getHTMLFile("caption");
                                 const text = (() => {
                                     const multiLingual = context.multiLingual;
-                                    if (caption && multiLingual) {
-                                        return multiLingual[index].multiLingualTexts[caption].text;
+                                    if (captionParams.lang && multiLingual) {
+                                        return multiLingual[index].multiLingualTexts[captionParams.lang].text;
                                     }
-                                    GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
+                                    GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
                                     return beat.text;
                                 })();
                                 const htmlData = interpolate(template, {
                                     caption: text,
                                     width: `${canvasSize.width}`,
                                     height: `${canvasSize.height}`,
+                                    styles: captionParams.styles.join(";\n"),
                                 });
                                 await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
                                 context.studio.beats[index].captionFile = imagePath;
@@ -61,7 +63,7 @@ const graph_data = {
     },
 };
 export const captions = async (context, callbacks) => {
-    if (context.caption) {
+    if (MulmoStudioContextMethods.getCaption(context)) {
         try {
             MulmoStudioContextMethods.setSessionState(context, "caption", true);
             const graph = new GraphAI(graph_data, { ...vanillaAgents });

package/lib/actions/images.d.ts CHANGED Viewed

@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
     imageRefs: Record<string, string>;
 }) => Promise<{
     imageParams: {
-        model?: string | undefined;
         style?: string | undefined;
+        model?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";
@@ -32,16 +32,18 @@ export declare const imagePreprocessAgent: (namedInputs: {
     imagePath: string | undefined;
     referenceImage: string | undefined;
     htmlPrompt?: undefined;
+    htmlImageSystemPrompt?: undefined;
 } | {
     imagePath: string;
     htmlPrompt: string;
+    htmlImageSystemPrompt: string[];
 } | {
     imagePath: string;
     images: string[];
     imageFromMovie: boolean;
     imageParams: {
-        model?: string | undefined;
         style?: string | undefined;
+        model?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";
@@ -62,11 +64,12 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     movieFile: string | undefined;
     htmlPrompt?: undefined;
+    htmlImageSystemPrompt?: undefined;
 } | {
     images: string[];
     imageParams: {
-        model?: string | undefined;
         style?: string | undefined;
+        model?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";
@@ -90,6 +93,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
     referenceImage: string;
     prompt: string;
     htmlPrompt?: undefined;
+    htmlImageSystemPrompt?: undefined;
 }>;
 export declare const imagePluginAgent: (namedInputs: {
     context: MulmoStudioContext;
@@ -97,5 +101,5 @@ export declare const imagePluginAgent: (namedInputs: {
     index: number;
 }) => Promise<void>;
 export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
-export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
-export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
+export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
+export declare const generateBeatImage: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -1,21 +1,21 @@
 import dotenv from "dotenv";
 import fs from "fs";
-import { GraphAI, GraphAILogger } from "graphai";
-import { TaskManager } from "graphai/lib/task_manager.js";
+import { GraphAI, GraphAILogger, TaskManager } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
+import { anthropicAgent } from "@graphai/anthropic_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
+import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { findImagePlugin } from "../utils/image_plugins/index.js";
-import { imagePrompt } from "../utils/prompt.js";
+import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
+import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
 import { defaultOpenAIImageModel } from "../utils/const.js";
 import { renderHTMLToImage } from "../utils/markdown.js";
 const vanillaAgents = agents.default ?? agents;
 dotenv.config();
-// const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
 import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
 const htmlStyle = (context, beat) => {
@@ -43,7 +43,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
     }
     if (beat.htmlPrompt) {
         const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
-        return { imagePath, htmlPrompt };
+        return { imagePath, htmlPrompt, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
     }
     // images for "edit_image"
     const images = (() => {
@@ -85,6 +85,7 @@ const beat_graph_data = {
     nodes: {
         context: {},
         imageAgentInfo: {},
+        htmlImageAgentInfo: {},
         movieAgentInfo: {},
         imageRefs: {},
         beat: {},
@@ -113,25 +114,21 @@ const beat_graph_data = {
         htmlImageAgent: {
             if: ":preprocessor.htmlPrompt",
             defaultValue: {},
-            agent: "openAIAgent",
+            agent: ":htmlImageAgentInfo.agent",
+            params: {
+                mode: ":htmlImageAgentInfo.model",
+            },
             inputs: {
                 prompt: ":preprocessor.htmlPrompt",
-                system: [
-                    "Based on the provided information, create a single slide HTML page using Tailwind CSS.",
-                    "If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
-                    "Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
-                    "Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
-                    "If data is provided, use it effectively to populate the slide.",
-                ],
+                system: ":preprocessor.htmlImageSystemPrompt",
             },
         },
         htmlImageGenerator: {
             if: ":preprocessor.htmlPrompt",
             defaultValue: {},
             agent: htmlImageGeneratorAgent,
-            // console: { before: true, after: true },
             inputs: {
-                html: ":htmlImageAgent.text.codeBlock()",
+                html: ":htmlImageAgent.text.codeBlockOrRaw()",
                 canvasSize: ":context.presentationStyle.canvasSize",
                 file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
                 mulmoContext: ":context", // for fileCacheAgentFilter
@@ -213,6 +210,7 @@ const graph_data = {
     nodes: {
         context: {},
         imageAgentInfo: {},
+        htmlImageAgentInfo: {},
         movieAgentInfo: {},
         outputStudioFilePath: {},
         imageRefs: {},
@@ -222,6 +220,7 @@ const graph_data = {
                 rows: ":context.studio.script.beats",
                 context: ":context",
                 imageAgentInfo: ":imageAgentInfo",
+                htmlImageAgentInfo: ":htmlImageAgentInfo",
                 movieAgentInfo: ":movieAgentInfo",
                 imageRefs: ":imageRefs",
             },
@@ -268,7 +267,6 @@ const graph_data = {
             },
         },
         writeOutput: {
-            // console: { before: true },
             agent: "fileWriteAgent",
             inputs: {
                 file: ":outputStudioFilePath",
@@ -291,7 +289,7 @@ const googleAuth = async () => {
         throw error;
     }
 };
-const graphOption = async (context) => {
+const graphOption = async (context, settings) => {
     const agentFilters = [
         {
             name: "fileCacheAgentFilter",
@@ -305,21 +303,22 @@ const graphOption = async (context) => {
         taskManager,
     };
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
+    const config = settings2GraphAIConfig(settings);
     // We need to get google's auth token only if the google is the text2image provider.
     if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
+        userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
-        options.config = {
-            imageGoogleAgent: {
-                projectId: process.env.GOOGLE_PROJECT_ID,
-                token,
-            },
-            movieGoogleAgent: {
-                projectId: process.env.GOOGLE_PROJECT_ID,
-                token,
-            },
+        config["imageGoogleAgent"] = {
+            projectId: process.env.GOOGLE_PROJECT_ID,
+            token,
+        };
+        config["movieGoogleAgent"] = {
+            projectId: process.env.GOOGLE_PROJECT_ID,
+            token,
         };
     }
+    options.config = config;
     return options;
 };
 // TODO: unit test
@@ -370,13 +369,28 @@ const prepareGenerateImages = async (context) => {
     const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
     mkdir(imageProjectDirPath);
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
+    const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
     const imageRefs = await getImageRefs(context);
+    // Determine movie agent based on provider
+    const getMovieAgent = () => {
+        if (context.dryRun)
+            return "mediaMockAgent";
+        const provider = context.presentationStyle.movieParams?.provider ?? "google";
+        switch (provider) {
+            case "replicate":
+                return "movieReplicateAgent";
+            case "google":
+            default:
+                return "movieGoogleAgent";
+        }
+    };
     GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
     const injections = {
         context,
         imageAgentInfo,
+        htmlImageAgentInfo,
         movieAgentInfo: {
-            agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
+            agent: getMovieAgent(),
         },
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
         imageRefs,
@@ -384,6 +398,9 @@ const prepareGenerateImages = async (context) => {
     return injections;
 };
 const getConcurrency = (context) => {
+    if (context.presentationStyle.movieParams?.provider === "replicate") {
+        return 4;
+    }
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     if (imageAgentInfo.provider === "openai") {
         // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -393,10 +410,20 @@ const getConcurrency = (context) => {
     }
     return 4;
 };
-const generateImages = async (context, callbacks) => {
-    const options = await graphOption(context);
+const generateImages = async (context, settings, callbacks) => {
+    const options = await graphOption(context, settings);
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
+    const graph = new GraphAI(graph_data, {
+        ...vanillaAgents,
+        imageGoogleAgent,
+        movieGoogleAgent,
+        movieReplicateAgent,
+        imageOpenaiAgent,
+        mediaMockAgent,
+        fileWriteAgent,
+        openAIAgent,
+        anthropicAgent,
+    }, options);
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
@@ -408,10 +435,10 @@ const generateImages = async (context, callbacks) => {
     const res = await graph.run();
     return res.mergeResult;
 };
-export const images = async (context, callbacks) => {
+export const images = async (context, settings, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "image", true);
-        const newContext = await generateImages(context, callbacks);
+        const newContext = await generateImages(context, settings, callbacks);
         MulmoStudioContextMethods.setSessionState(context, "image", false);
         return newContext;
     }
@@ -420,10 +447,20 @@ export const images = async (context, callbacks) => {
         throw error;
     }
 };
-export const generateBeatImage = async (index, context, callbacks) => {
-    const options = await graphOption(context);
+export const generateBeatImage = async (index, context, settings, callbacks) => {
+    const options = await graphOption(context, settings);
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(beat_graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
+    const graph = new GraphAI(beat_graph_data, {
+        ...vanillaAgents,
+        imageGoogleAgent,
+        movieGoogleAgent,
+        movieReplicateAgent,
+        imageOpenaiAgent,
+        mediaMockAgent,
+        fileWriteAgent,
+        openAIAgent,
+        anthropicAgent,
+    }, options);
     Object.keys(injections).forEach((key) => {
         if ("outputStudioFilePath" !== key) {
             graph.injectValue(key, injections[key]);

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
-export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
+import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
+export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
     videoId: string;
     videoPart: string;
 };

package/lib/actions/movie.js CHANGED Viewed

@@ -1,12 +1,12 @@
 import { GraphAILogger, assert } from "graphai";
-import { mulmoTransitionSchema } from "../types/index.js";
+import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
-export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
+export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
     const videoId = `v${inputIndex}`;
     const videoFilters = [];
     // Handle different media types
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
         videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
     }
     // Common filters for all media types
-    videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
-    // In case of the aspect ratio mismatch, we fill the extra space with black color.
-    `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
+    videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
+    // Apply scaling based on fill option
+    if (fillOption.style === "aspectFill") {
+        // For aspect fill: scale to fill the canvas completely, cropping if necessary
+        videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
+    }
+    else {
+        // For aspect fit: scale to fit within canvas, padding if necessary
+        videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
+        // In case of the aspect ratio mismatch, we fill the extra space with black color.
+        `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
+    }
+    videoFilters.push("setsar=1", "format=yuv420p");
     return {
         videoId,
         videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
@@ -59,7 +69,8 @@ const getOutputOption = (audioId, videoId) => {
         "-b:a 128k", // Audio bitrate
     ];
 };
-const createVideo = async (audioArtifactFilePath, outputVideoPath, context, caption) => {
+const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
+    const caption = MulmoStudioContextMethods.getCaption(context);
     const start = performance.now();
     const ffmpegContext = FfmpegContextInit();
     const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
@@ -95,7 +106,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
             return 0;
         })();
         const duration = studioBeat.duration + extraPadding;
-        const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
+        // Get fillOption from merged imageParams (global + beat-specific)
+        const globalFillOption = context.presentationStyle.movieParams?.fillOption;
+        const beatFillOption = beat.movieParams?.fillOption;
+        const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
+        const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
+        const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
         ffmpegContext.filterComplex.push(videoPart);
         if (caption && studioBeat.captionFile) {
             const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
@@ -193,12 +209,11 @@ export const movieFilePath = (context) => {
 export const movie = async (context) => {
     MulmoStudioContextMethods.setSessionState(context, "video", true);
     try {
-        const caption = MulmoStudioContextMethods.getCaption(context);
         const fileName = MulmoStudioContextMethods.getFileName(context);
         const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
         const outputVideoPath = movieFilePath(context);
-        if (await createVideo(audioArtifactFilePath, outputVideoPath, context, caption)) {
+        if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
             writingMessage(outputVideoPath);
         }
     }

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { assert } from "graphai";
+import { assert, GraphAILogger } from "graphai";
 import { silent60secPath } from "../utils/file.js";
 import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
 const getMovieDulation = async (beat) => {
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
                 const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
                 // Yes, the current beat has spilled over audio.
                 const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
-                if (beatsTotalDuration > audioDuration) {
+                if (beatsTotalDuration > audioDuration + 0.01) {
+                    // 0.01 is a tolerance to avoid floating point precision issues
                     group.reduce((remaining, idx, iGroup) => {
                         if (remaining >= groupBeatsDurations[iGroup]) {
                             return remaining - groupBeatsDurations[iGroup];
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
                 }
                 else {
                     // Last beat gets the rest of the audio.
-                    groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
+                    if (audioDuration > beatsTotalDuration) {
+                        groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
+                    }
                 }
                 beatDurations.push(...groupBeatsDurations);
             }
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
                 // padding is the amount of audio padding specified in the script.
                 const padding = getPadding(context, beat, index);
                 // totalPadding is the amount of audio padding to be added to the audio file.
-                const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
+                const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
                 const beatDuration = audioDuration + totalPadding;
                 beatDurations.push(beatDuration);
                 if (totalPadding > 0) {
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
     // We cannot reuse longSilentId. We need to explicitly split it for each beat.
     const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
     if (silentIds.length > 0) {
-        const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
+        const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
         ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
     }
     const inputIds = [];
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
         }
     });
     assert(silentIds.length === 0, "silentIds.length !== 0");
+    GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
     // Finally, combine all audio files.
     ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
     await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -13,6 +13,8 @@ export declare const imageOpenaiAgent: AgentFunction<{
 }, {
     prompt: string;
     images: string[] | null | undefined;
+}, {
+    apiKey?: string;
 }>;
 declare const imageOpenaiAgentInfo: AgentFunctionInfo;
 export default imageOpenaiAgentInfo;

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -4,9 +4,10 @@ import { GraphAILogger } from "graphai";
 import OpenAI, { toFile } from "openai";
 import { defaultOpenAIImageModel } from "../utils/const.js";
 // https://platform.openai.com/docs/guides/image-generation
-export const imageOpenaiAgent = async ({ namedInputs, params }) => {
+export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
     const { prompt, images } = namedInputs;
-    const { apiKey, moderation, canvasSize } = params;
+    const { moderation, canvasSize } = params;
+    const { apiKey } = { ...config };
     const model = params.model ?? defaultOpenAIImageModel;
     const openai = new OpenAI({ apiKey });
     const size = (() => {

package/lib/agents/index.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
 import tavilySearchAgent from "./tavily_agent.js";
 import movieGoogleAgent from "./movie_google_agent.js";
+import movieReplicateAgent from "./movie_replicate_agent.js";
 import mediaMockAgent from "./media_mock_agent.js";
 import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
 import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };

package/lib/agents/index.js CHANGED Viewed

@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
 import tavilySearchAgent from "./tavily_agent.js";
 import movieGoogleAgent from "./movie_google_agent.js";
+import movieReplicateAgent from "./movie_replicate_agent.js";
 import mediaMockAgent from "./media_mock_agent.js";
 import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 // import * as vanilla from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };

package/lib/agents/movie_replicate_agent.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import type { AgentFunction, AgentFunctionInfo } from "graphai";
+export declare const getAspectRatio: (canvasSize: {
+    width: number;
+    height: number;
+}) => string;
+export type MovieReplicateConfig = {
+    apiKey?: string;
+};
+export declare const movieReplicateAgent: AgentFunction<{
+    model: `${string}/${string}` | undefined;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
+    duration?: number;
+}, {
+    buffer: Buffer;
+}, {
+    prompt: string;
+    imagePath?: string;
+}, MovieReplicateConfig>;
+declare const movieReplicateAgentInfo: AgentFunctionInfo;
+export default movieReplicateAgentInfo;