npm - mulmocast - Versions diffs - 0.0.15 → 0.0.16 - Mend

mulmocast 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/assets/templates/text_and_image.json +6 -0
package/assets/templates/text_only.json +6 -0
package/lib/actions/audio.d.ts +3 -1
package/lib/actions/audio.js +82 -44
package/lib/actions/captions.js +1 -1
package/lib/actions/images.d.ts +4 -0
package/lib/actions/images.js +40 -21
package/lib/actions/movie.js +19 -19
package/lib/actions/pdf.js +2 -2
package/lib/actions/translate.js +1 -1
package/lib/agents/add_bgm_agent.js +3 -3
package/lib/agents/combine_audio_files_agent.js +1 -1
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/tavily_agent.d.ts +15 -0
package/lib/agents/tavily_agent.js +130 -0
package/lib/cli/commands/audio/builder.d.ts +2 -0
package/lib/cli/commands/image/builder.d.ts +2 -0
package/lib/cli/commands/movie/builder.d.ts +2 -0
package/lib/cli/commands/pdf/builder.d.ts +2 -0
package/lib/cli/commands/translate/builder.d.ts +2 -0
package/lib/cli/common.d.ts +2 -0
package/lib/cli/common.js +6 -0
package/lib/cli/helpers.d.ts +5 -1
package/lib/cli/helpers.js +18 -2
package/lib/methods/index.d.ts +1 -1
package/lib/methods/index.js +1 -1
package/lib/methods/mulmo_presentation_style.d.ts +14 -0
package/lib/methods/mulmo_presentation_style.js +70 -0
package/lib/methods/mulmo_studio_context.d.ts +14 -0
package/lib/methods/mulmo_studio_context.js +20 -2
package/lib/tools/deep_research.d.ts +2 -0
package/lib/tools/deep_research.js +265 -0
package/lib/types/schema.d.ts +31 -0
package/lib/types/schema.js +1 -1
package/lib/types/type.d.ts +3 -1
package/lib/utils/ffmpeg_utils.d.ts +1 -0
package/lib/utils/ffmpeg_utils.js +10 -0
package/lib/utils/file.d.ts +1 -3
package/lib/utils/file.js +4 -11
package/lib/utils/preprocess.js +1 -0
package/lib/utils/prompt.d.ts +3 -0
package/lib/utils/prompt.js +52 -0
package/package.json +4 -3
package/assets/music/StarsBeyondEx.mp3 +0 -0

package/assets/templates/text_and_image.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text and Image",
+  "description": "Template for Text and Image Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/text_only.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Text Only",
+  "description": "Template for Text Only Script.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "text_only_template.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import "dotenv/config";
 import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext } from "../types/index.js";
+import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
+export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
 export declare const audioFilePath: (context: MulmoStudioContext) => string;
+export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
 export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/audio.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import "dotenv/config";
 import { GraphAI } from "graphai";
+import { TaskManager } from "graphai/lib/task_manager.js";
 import * as agents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
 import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
-import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
+import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -24,7 +25,7 @@ const provider_to_agent = {
     elevenlabs: "ttsElevenlabsAgent",
     mock: "mediaMockAgent",
 };
-const getAudioPath = (context, beat, audioFile, audioDirPath) => {
+const getAudioPath = (context, beat, audioFile) => {
     if (beat.audio?.type === "audio") {
         const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
         if (path) {
@@ -35,34 +36,48 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     if (beat.text === undefined || beat.text === "") {
         return undefined; // It indicates that the audio is not needed.
     }
-    return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
+    return audioFile;
+};
+const getAudioParam = (presentationStyle, beat) => {
+    const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
+    // Use speaker-specific provider if available, otherwise fall back to script-level provider
+    const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
+    const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
+    return { voiceId, provider, speechOptions };
+};
+export const getBeatAudioPath = (text, context, beat, lang) => {
+    const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
+    const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
+    const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
+    const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
+    const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
+    return getAudioPath(context, beat, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
-    const { lang } = context;
-    const speaker = context.studio.script.speechParams.speakers[beat.speaker];
-    const voiceId = speaker.voiceId;
-    const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
+    const { beat, studioBeat, multiLingual, context } = namedInputs;
+    const { lang, presentationStyle } = context;
     const text = localizedText(beat, multiLingual, lang);
-    // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = speaker.provider ?? context.studio.script.speechParams.provider;
-    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
-    const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
-    const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
+    const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
+    const audioPath = getBeatAudioPath(text, context, beat, lang);
     studioBeat.audioFile = audioPath;
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
         ttsAgent: provider_to_agent[provider],
-        studioBeat,
+        text,
         voiceId,
         speechOptions,
         audioPath,
-        text,
+        studioBeat,
         needsTTS,
     };
 };
 const graph_tts = {
     nodes: {
+        beat: {},
+        studioBeat: {},
+        multiLingual: {},
+        context: {},
+        __mapIndex: {},
         preprocessor: {
             agent: preprocessor,
             inputs: {
@@ -70,7 +85,6 @@ const graph_tts = {
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
                 context: ":context",
-                audioDirPath: ":audioDirPath",
             },
         },
         tts: {
@@ -100,8 +114,6 @@ const graph_data = {
         audioArtifactFilePath: {},
         audioCombinedFilePath: {},
         outputStudioFilePath: {},
-        audioDirPath: {},
-        audioSegmentDirPath: {},
         musicFile: {},
         map: {
             agent: "mapAgent",
@@ -109,8 +121,6 @@ const graph_data = {
                 rows: ":context.studio.script.beats",
                 studioBeat: ":context.studio.beats",
                 multiLingual: ":context.studio.multiLingual",
-                audioDirPath: ":audioDirPath",
-                audioSegmentDirPath: ":audioSegmentDirPath",
                 context: ":context",
             },
             params: {
@@ -122,7 +132,7 @@ const graph_data = {
         combineFiles: {
             agent: "combineAudioFilesAgent",
             inputs: {
-                map: ":map",
+                onComplete: ":map",
                 context: ":context",
                 combinedFileName: ":audioCombinedFilePath",
             },
@@ -141,7 +151,7 @@ const graph_data = {
                 wait: ":combineFiles",
                 voiceFile: ":audioCombinedFilePath",
                 outputFile: ":audioArtifactFilePath",
-                script: ":context.studio.script",
+                context: ":context",
                 params: {
                     musicFile: ":musicFile",
                 },
@@ -172,40 +182,68 @@ export const audioFilePath = (context) => {
     const { outDirPath } = fileDirs;
     return getAudioArtifactFilePath(outDirPath, studio.filename);
 };
+const getConcurrency = (context) => {
+    // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
+    const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
+        const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
+        return provider === "nijivoice" || provider === "elevenlabs";
+    });
+    return hasLimitedConcurrencyProvider ? 1 : 8;
+};
+const audioAgents = {
+    ...vanillaAgents,
+    fileWriteAgent,
+    ttsOpenaiAgent,
+    ttsNijivoiceAgent,
+    ttsGoogleAgent,
+    ttsElevenlabsAgent,
+    addBGMAgent,
+    combineAudioFilesAgent,
+};
+export const generateBeatAudio = async (index, context, callbacks) => {
+    try {
+        MulmoStudioContextMethods.setSessionState(context, "audio", true);
+        const { studio, fileDirs } = context;
+        const { outDirPath, audioDirPath } = fileDirs;
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        mkdir(outDirPath);
+        mkdir(audioSegmentDirPath);
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
+        graph.injectValue("__mapIndex", index);
+        graph.injectValue("beat", context.studio.script.beats[index]);
+        graph.injectValue("studioBeat", context.studio.beats[index]);
+        graph.injectValue("multiLingual", context.studio.multiLingual);
+        graph.injectValue("context", context);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
+        await graph.run();
+    }
+    finally {
+        MulmoStudioContextMethods.setSessionState(context, "audio", false);
+    }
+};
 export const audio = async (context, callbacks) => {
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const { studio, fileDirs, lang } = context;
         const { outDirPath, audioDirPath } = fileDirs;
         const audioArtifactFilePath = audioFilePath(context);
-        const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
-        const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
+        const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
+        const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
-        const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
-            const provider = speaker.provider ?? studio.script.speechParams.provider;
-            return provider === "nijivoice" || provider === "elevenlabs";
-        });
-        graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
-        const graph = new GraphAI(graph_data, {
-            ...vanillaAgents,
-            fileWriteAgent,
-            ttsOpenaiAgent,
-            ttsNijivoiceAgent,
-            ttsGoogleAgent,
-            ttsElevenlabsAgent,
-            addBGMAgent,
-            combineAudioFilesAgent,
-        }, { agentFilters });
+        const taskManager = new TaskManager(getConcurrency(context));
+        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-        graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
-        graph.injectValue("audioDirPath", audioDirPath);
-        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
+        graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
         if (callbacks) {
             callbacks.forEach((callback) => {
                 graph.registerCallback(callback);

package/lib/actions/captions.js CHANGED Viewed

@@ -26,7 +26,7 @@ const graph_data = {
                                 const { fileDirs } = namedInputs.context;
                                 const { caption } = context;
                                 const { imageDirPath } = fileDirs;
-                                const { canvasSize } = context.studio.script;
+                                const { canvasSize } = context.presentationStyle;
                                 const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
                                 const template = getHTMLFile("caption");
                                 const text = (() => {

package/lib/actions/images.d.ts CHANGED Viewed

@@ -32,8 +32,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     movieFile: string | undefined;
     imagePath: string | undefined;
+    referenceImage: string | undefined;
 } | {
+    imagePath: string;
     images: string[];
+    imageFromMovie: boolean;
     imageParams: {
         model?: string | undefined;
         style?: string | undefined;
@@ -81,6 +84,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     movieFile: string | undefined;
     imagePath: string;
+    referenceImage: string;
     prompt: string;
 }>;
 export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/images.js CHANGED Viewed

@@ -1,22 +1,24 @@
 import dotenv from "dotenv";
 import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
+import { TaskManager } from "graphai/lib/task_manager.js";
 import * as agents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
-import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
 const vanillaAgents = agents.default ?? agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
-const htmlStyle = (script, beat) => {
+import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
+const htmlStyle = (context, beat) => {
     return {
-        canvasSize: MulmoScriptMethods.getCanvasSize(script),
-        textSlideStyle: MulmoScriptMethods.getTextSlideStyle(script, beat),
+        canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
+        textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
     };
 };
 export const imagePreprocessAgent = async (namedInputs) => {
@@ -32,10 +34,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
         if (plugin) {
             try {
                 MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
-                const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
+                const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
                 const path = await plugin.process(processorParams);
                 // undefined prompt indicates that image generation is not needed
-                return { imagePath: path, ...returnValue };
+                return { imagePath: path, referenceImage: path, ...returnValue };
             }
             finally {
                 MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
@@ -49,10 +51,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
         return sources.filter((source) => source !== undefined);
     })();
     if (beat.moviePrompt && !beat.imagePrompt) {
-        return { ...returnValue, images }; // no image prompt, only movie prompt
+        return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
     }
     const prompt = imagePrompt(beat, imageParams.style);
-    return { imagePath, prompt, ...returnValue, images };
+    return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
 };
 const beat_graph_data = {
     version: 0.5,
@@ -93,7 +95,7 @@ const beat_graph_data = {
                 params: {
                     model: ":preprocessor.imageParams.model",
                     moderation: ":preprocessor.imageParams.moderation",
-                    canvasSize: ":context.studio.script.canvasSize",
+                    canvasSize: ":context.presentationStyle.canvasSize",
                 },
             },
             defaultValue: {},
@@ -104,24 +106,37 @@ const beat_graph_data = {
             inputs: {
                 onComplete: ":imageGenerator", // to wait for imageGenerator to finish
                 prompt: ":beat.moviePrompt",
-                imagePath: ":preprocessor.imagePath",
+                imagePath: ":preprocessor.referenceImage",
                 file: ":preprocessor.movieFile",
                 studio: ":context.studio", // for cache
                 mulmoContext: ":context", // for fileCacheAgentFilter
                 index: ":__mapIndex", // for cache
                 sessionType: "movie", // for cache
                 params: {
-                    model: ":context.studio.script.movieParams.model",
+                    model: ":context.presentationStyle.movieParams.model",
                     duration: ":beat.duration",
-                    canvasSize: ":context.studio.script.canvasSize",
+                    canvasSize: ":context.presentationStyle.canvasSize",
                 },
             },
             defaultValue: {},
         },
+        imageFromMovie: {
+            if: ":preprocessor.imageFromMovie",
+            agent: async (namedInputs) => {
+                await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
+                return { generatedImage: true };
+            },
+            inputs: {
+                onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                imageFile: ":preprocessor.imagePath",
+                movieFile: ":preprocessor.movieFile",
+            },
+            defaultValue: { generatedImage: false },
+        },
         output: {
             agent: "copyAgent",
             inputs: {
-                onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
                 imageFile: ":preprocessor.imagePath",
                 movieFile: ":preprocessor.movieFile",
             },
@@ -217,7 +232,6 @@ const googleAuth = async () => {
     }
 };
 const graphOption = async (context) => {
-    const { studio } = context;
     const agentFilters = [
         {
             name: "fileCacheAgentFilter",
@@ -225,12 +239,14 @@ const graphOption = async (context) => {
             nodeIds: ["imageGenerator", "movieGenerator"],
         },
     ];
+    const taskManager = new TaskManager(getConcurrency(context));
     const options = {
         agentFilters,
+        taskManager,
     };
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     // We need to get google's auth token only if the google is the text2image provider.
-    if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
+    if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
         GraphAILogger.log("google was specified as text2image engine");
         const token = await googleAuth();
         options.config = {
@@ -250,9 +266,9 @@ const prepareGenerateImages = async (context) => {
     const { studio, fileDirs } = context;
     const { outDirPath, imageDirPath } = fileDirs;
     mkdir(`${imageDirPath}/${studio.filename}`);
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script, context.dryRun);
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
     const imageRefs = {};
-    const images = studio.script.imageParams?.images;
+    const images = context.presentationStyle.imageParams?.images;
     if (images) {
         await Promise.all(Object.keys(images).map(async (key) => {
             const image = images[key];
@@ -302,14 +318,17 @@ const prepareGenerateImages = async (context) => {
     };
     return injections;
 };
-const generateImages = async (context, callbacks) => {
-    const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(context.studio.script);
+const getConcurrency = (context) => {
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     if (imageAgentInfo.provider === "openai") {
         // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
         // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
         // gpt-image-1：3,000,000 TPM、150 images per minute
-        graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
+        return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
     }
+    return 4;
+};
+const generateImages = async (context, callbacks) => {
     const options = await graphOption(context);
     const injections = await prepareGenerateImages(context);
     const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);

package/lib/actions/movie.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { GraphAILogger, assert } from "graphai";
 import { mulmoTransitionSchema } from "../types/index.js";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
@@ -59,22 +59,22 @@ const getOutputOption = (audioId, videoId) => {
         "-b:a 128k", // Audio bitrate
     ];
 };
-const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
+const createVideo = async (audioArtifactFilePath, outputVideoPath, context, caption) => {
     const start = performance.now();
     const ffmpegContext = FfmpegContextInit();
-    const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
+    const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
     if (missingIndex !== -1) {
         GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
         return false;
     }
-    const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
+    const canvasInfo = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
     // Add each image input
     const filterComplexVideoIds = [];
     const filterComplexAudioIds = [];
     const transitionVideoIds = [];
     const beatTimestamps = [];
-    studio.beats.reduce((timestamp, studioBeat, index) => {
-        const beat = studio.script.beats[index];
+    context.studio.beats.reduce((timestamp, studioBeat, index) => {
+        const beat = context.studio.script.beats[index];
         const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
         if (!sourceFile) {
             throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
@@ -83,14 +83,14 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
             throw new Error(`studioBeat.duration is not set: index=${index}`);
         }
         const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
-        const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
+        const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
         const extraPadding = (() => {
             // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
             if (index === 0) {
-                return studio.script.audioParams.introPadding;
+                return context.presentationStyle.audioParams.introPadding;
             }
-            else if (index === studio.beats.length - 1) {
-                return studio.script.audioParams.outroPadding;
+            else if (index === context.studio.beats.length - 1) {
+                return context.presentationStyle.audioParams.outroPadding;
             }
             return 0;
         })();
@@ -106,7 +106,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         else {
             filterComplexVideoIds.push(videoId);
         }
-        if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
+        if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
             const sourceId = filterComplexVideoIds.pop();
             ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
             filterComplexVideoIds.push(`${sourceId}_0`);
@@ -127,16 +127,16 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         beatTimestamps.push(timestamp);
         return timestamp + duration;
     }, 0);
-    assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
-    assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
+    assert(filterComplexVideoIds.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
+    assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
     // console.log("*** images", images.audioIds);
     // Concatenate the trimmed images
     const concatVideoId = "concat_video";
-    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
+    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${context.studio.beats.length}:v=1:a=0[${concatVideoId}]`);
     // Add tranditions if needed
     const mixedVideoId = (() => {
-        if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
-            const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
+        if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
+            const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
             return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
                 const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
                 const processedVideoId = `${transitionVideoId}_f`;
@@ -166,8 +166,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const end = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
-    GraphAILogger.info(studio.script.title);
-    GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
+    GraphAILogger.info(context.studio.script.title);
+    GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
     return true;
 };
 export const movieFilePath = (context) => {
@@ -181,7 +181,7 @@ export const movie = async (context) => {
         const { outDirPath } = fileDirs;
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
         const outputVideoPath = movieFilePath(context);
-        if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
+        if (await createVideo(audioArtifactFilePath, outputVideoPath, context, caption)) {
             writingMessage(outputVideoPath);
         }
     }

package/lib/actions/pdf.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import fs from "fs";
 import path from "path";
 import puppeteer from "puppeteer";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { localizedText, isHttp } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
 import { interpolate } from "../utils/markdown.js";
@@ -97,7 +97,7 @@ const getHandoutTemplateData = (isLandscapeImage) => ({
 const generatePDFHTML = async (context, pdfMode, pdfSize) => {
     const { studio, lang = "en" } = context;
     const { multiLingual } = studio;
-    const { width: imageWidth, height: imageHeight } = MulmoScriptMethods.getCanvasSize(studio.script);
+    const { width: imageWidth, height: imageHeight } = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
     const isLandscapeImage = imageWidth > imageHeight;
     const imagePaths = studio.beats.map((beat) => beat.imageFile);
     const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));

package/lib/actions/translate.js CHANGED Viewed

@@ -163,7 +163,7 @@ const translateGraph = {
                 },
             },
         },
-        writeOutout: {
+        writeOutput: {
             // console: { before: true },
             agent: "fileWriteAgent",
             inputs: {

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -1,11 +1,11 @@
 import { GraphAILogger } from "graphai";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
 const addBGMAgent = async ({ namedInputs, params, }) => {
-    const { voiceFile, outputFile, script } = namedInputs;
+    const { voiceFile, outputFile, context } = namedInputs;
     const { musicFile } = params;
     const speechDuration = await ffmpegGetMediaDuration(voiceFile);
-    const introPadding = script.audioParams.introPadding;
-    const outroPadding = script.audioParams.outroPadding;
+    const introPadding = context.presentationStyle.audioParams.introPadding;
+    const outroPadding = context.presentationStyle.audioParams.outroPadding;
     const totalDuration = speechDuration + introPadding + outroPadding;
     GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
     const ffmpegContext = FfmpegContextInit();

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -27,7 +27,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
                 if (index === context.studio.beats.length - 1) {
                     return 0;
                 }
-                return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
+                return isClosingGap ? context.presentationStyle.audioParams.closingPadding : context.presentationStyle.audioParams.padding;
             })();
             const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
             const totalPadding = await (async () => {

package/lib/agents/index.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
 import combineAudioFilesAgent from "./combine_audio_files_agent.js";
 import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
+import tavilySearchAgent from "./tavily_agent.js";
 import movieGoogleAgent from "./movie_google_agent.js";
 import mediaMockAgent from "./media_mock_agent.js";
 import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
@@ -12,4 +13,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
 import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };

package/lib/agents/index.js CHANGED Viewed

@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
 import combineAudioFilesAgent from "./combine_audio_files_agent.js";
 import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
+import tavilySearchAgent from "./tavily_agent.js";
 import movieGoogleAgent from "./movie_google_agent.js";
 import mediaMockAgent from "./media_mock_agent.js";
 import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
@@ -13,4 +14,4 @@ import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 // import * as vanilla from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };