npm - mulmocast - Versions diffs - 1.2.23 → 1.2.25 - Mend

mulmocast 1.2.23 → 1.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/assets/images/blank.png +0 -0
package/assets/images/blank_sq.png +0 -0
package/assets/images/blank_v.png +0 -0
package/assets/templates/ghibli_comic_strips.json +1 -1
package/assets/templates/vision.json +6 -0
package/lib/actions/audio.d.ts +2 -0
package/lib/actions/audio.js +2 -2
package/lib/actions/captions.d.ts +2 -0
package/lib/actions/captions.js +2 -2
package/lib/actions/images.d.ts +393 -2
package/lib/actions/images.js +3 -3
package/lib/actions/index.d.ts +1 -1
package/lib/actions/index.js +1 -1
package/lib/actions/translate.d.ts +2 -0
package/lib/actions/translate.js +2 -2
package/lib/agents/image_genai_agent.js +24 -3
package/lib/agents/movie_genai_agent.js +3 -0
package/lib/data/promptTemplates.js +8 -1
package/lib/data/scriptTemplates.d.ts +11470 -0
package/lib/data/scriptTemplates.js +1344 -0
package/lib/data/templateDataSet.d.ts +1 -0
package/lib/data/templateDataSet.js +4 -0
package/lib/utils/file.d.ts +3 -0
package/lib/utils/file.js +3 -0
package/lib/utils/image_plugins/vision.js +3 -6
package/lib/utils/prompt.d.ts +1 -1
package/lib/utils/prompt.js +14 -2
package/lib/utils/provider2agent.js +2 -2
package/package.json +7 -6
package/scripts/templates/vision.json +1343 -0
package/scripts/test/nano_banana.json +29 -0
package/scripts/test/test_genai.json +21 -0

package/assets/images/blank.png ADDED Viewed

Binary file

package/assets/images/blank_sq.png ADDED Viewed

Binary file

package/assets/images/blank_v.png ADDED Viewed

Binary file

package/assets/templates/ghibli_comic_strips.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "title": "Ghibli-style comic strips with nano banana.",
+  "title": "Ghibli-style comic strips",
   "description": "Ghibli-style comic strips with nano banana.",
   "systemPrompt": "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
   "presentationStyle": {

package/assets/templates/vision.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Business Analysis",
+  "description": "Template for business analysis presentation.",
+  "systemPrompt": "First, determine a set of slides (=beats) to present, and choose an appropriate style for each beat (from the JSON template blow) and add required data for it. For each beat, put an appropriate text to the text field for the presenter to read for that slide in details. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "scriptName": "vision.json"
+}

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 import "dotenv/config";
+import type { GraphData } from "graphai";
 import { MulmoStudioContext, MulmoBeat, PublicAPIArgs } from "../types/index.js";
 export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
 export declare const listLocalizedAudioPaths: (context: MulmoStudioContext) => (string | undefined)[];
+export declare const audio_graph_data: GraphData;
 export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs & {
     langs: string[];
 }) => Promise<void>;

package/lib/actions/audio.js CHANGED Viewed

@@ -141,7 +141,7 @@ const graph_tts_map = {
         },
     },
 };
-const graph_data = {
+export const audio_graph_data = {
     version: 0.5,
     concurrency: 8,
     nodes: {
@@ -276,7 +276,7 @@ export const audio = async (context, args) => {
         mkdir(audioSegmentDirPath);
         const config = settings2GraphAIConfig(settings, process.env);
         const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
+        const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
         graph.injectValue("context", context);
         graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
         graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);

package/lib/actions/captions.d.ts CHANGED Viewed

@@ -1,2 +1,4 @@
 import { MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
+import type { GraphData } from "graphai";
+export declare const caption_graph_data: GraphData;
 export declare const captions: (context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<MulmoStudioContext>;

package/lib/actions/captions.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
 import { MulmoStudioContextMethods, MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 const vanillaAgents = agents.default ?? agents;
-const graph_data = {
+export const caption_graph_data = {
     version: 0.5,
     nodes: {
         context: {},
@@ -74,7 +74,7 @@ export const captions = async (context, args) => {
     if (MulmoStudioContextMethods.getCaption(context)) {
         try {
             MulmoStudioContextMethods.setSessionState(context, "caption", true);
-            const graph = new GraphAI(graph_data, { ...vanillaAgents, fileWriteAgent });
+            const graph = new GraphAI(caption_graph_data, { ...vanillaAgents, fileWriteAgent });
             const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
             const fileName = MulmoStudioContextMethods.getFileName(context);
             const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,5 +1,396 @@
-import type { GraphOptions } from "graphai";
-import { MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
+import type { GraphOptions, GraphData } from "graphai";
+import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
+export declare const beat_graph_data: {
+    version: number;
+    concurrency: number;
+    nodes: {
+        context: {};
+        htmlImageAgentInfo: {};
+        imageRefs: {};
+        beat: {};
+        __mapIndex: {};
+        forceMovie: {
+            value: boolean;
+        };
+        forceImage: {
+            value: boolean;
+        };
+        forceLipSync: {
+            value: boolean;
+        };
+        forceSoundEffect: {
+            value: boolean;
+        };
+        preprocessor: {
+            agent: (namedInputs: {
+                context: MulmoStudioContext;
+                beat: import("../types/type.js").MulmoBeat;
+                index: number;
+                imageRefs: Record<string, string>;
+            }) => Promise<{
+                imagePath: string;
+                htmlPrompt: string | undefined;
+                htmlImageFile: string;
+                htmlPath: string;
+                htmlImageSystemPrompt: string;
+            } | {
+                imagePath: string | undefined;
+                referenceImageForMovie: string | undefined;
+                imageParams: MulmoImageParams;
+                movieFile: string | undefined;
+                soundEffectFile?: string;
+                soundEffectPrompt?: string;
+                soundEffectModel?: string;
+                soundEffectAgentInfo?: {
+                    agentName: string;
+                    defaultModel: string;
+                };
+                lipSyncFile?: string;
+                lipSyncModel?: string;
+                lipSyncAgentName?: string;
+                lipSyncTrimAudio?: boolean;
+                bgmFile?: string | null;
+                startAt?: number;
+                duration?: number;
+                audioFile?: string;
+                beatDuration?: number;
+                htmlPrompt?: undefined;
+                htmlImageFile?: undefined;
+                htmlPath?: undefined;
+                htmlImageSystemPrompt?: undefined;
+            } | {
+                imagePath: string;
+                imageFromMovie: boolean;
+                movieAgentInfo: {
+                    agent: string;
+                    movieParams: {
+                        speed?: number | undefined;
+                        provider?: string | undefined;
+                        model?: string | undefined;
+                        fillOption?: {
+                            style: "aspectFit" | "aspectFill";
+                        } | undefined;
+                        transition?: {
+                            type: "fade" | "slideout_left";
+                            duration: number;
+                        } | undefined;
+                    };
+                };
+                imageParams: MulmoImageParams;
+                movieFile: string | undefined;
+                soundEffectFile?: string;
+                soundEffectPrompt?: string;
+                soundEffectModel?: string;
+                soundEffectAgentInfo?: {
+                    agentName: string;
+                    defaultModel: string;
+                };
+                lipSyncFile?: string;
+                lipSyncModel?: string;
+                lipSyncAgentName?: string;
+                lipSyncTrimAudio?: boolean;
+                bgmFile?: string | null;
+                startAt?: number;
+                duration?: number;
+                audioFile?: string;
+                beatDuration?: number;
+                htmlPrompt?: undefined;
+                htmlImageFile?: undefined;
+                htmlPath?: undefined;
+                htmlImageSystemPrompt?: undefined;
+            } | {
+                imagePath: string;
+                referenceImageForMovie: string;
+                imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
+                prompt: string;
+                referenceImages: string[];
+                movieAgentInfo: {
+                    agent: string;
+                    movieParams: {
+                        speed?: number | undefined;
+                        provider?: string | undefined;
+                        model?: string | undefined;
+                        fillOption?: {
+                            style: "aspectFit" | "aspectFill";
+                        } | undefined;
+                        transition?: {
+                            type: "fade" | "slideout_left";
+                            duration: number;
+                        } | undefined;
+                    };
+                };
+                imageParams: MulmoImageParams;
+                movieFile: string | undefined;
+                soundEffectFile?: string;
+                soundEffectPrompt?: string;
+                soundEffectModel?: string;
+                soundEffectAgentInfo?: {
+                    agentName: string;
+                    defaultModel: string;
+                };
+                lipSyncFile?: string;
+                lipSyncModel?: string;
+                lipSyncAgentName?: string;
+                lipSyncTrimAudio?: boolean;
+                bgmFile?: string | null;
+                startAt?: number;
+                duration?: number;
+                audioFile?: string;
+                beatDuration?: number;
+                htmlPrompt?: undefined;
+                htmlImageFile?: undefined;
+                htmlPath?: undefined;
+                htmlImageSystemPrompt?: undefined;
+            }>;
+            inputs: {
+                context: string;
+                beat: string;
+                index: string;
+                imageRefs: string;
+            };
+        };
+        imagePlugin: {
+            if: string;
+            defaultValue: {};
+            agent: (namedInputs: {
+                context: MulmoStudioContext;
+                beat: import("../types/type.js").MulmoBeat;
+                index: number;
+            }) => Promise<void>;
+            inputs: {
+                context: string;
+                beat: string;
+                index: string;
+                onComplete: string[];
+            };
+        };
+        htmlImageAgent: {
+            if: string;
+            defaultValue: {};
+            agent: string;
+            inputs: {
+                media: string;
+                prompt: string;
+                system: string;
+                params: {
+                    model: string;
+                    max_tokens: string;
+                };
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    mulmoContext: string;
+                    sessionType: string;
+                };
+            };
+        };
+        htmlReader: {
+            if: string;
+            agent: (namedInputs: {
+                htmlPath: string;
+            }) => Promise<{
+                html: string;
+            }>;
+            inputs: {
+                onComplete: string[];
+                htmlPath: string;
+            };
+            output: {
+                htmlText: string;
+            };
+            defaultValue: {};
+        };
+        htmlImageGenerator: {
+            if: string;
+            defaultValue: {};
+            agent: (namedInputs: {
+                file: string;
+                canvasSize: import("../types/type.js").MulmoCanvasDimension;
+                htmlText: string;
+            }) => Promise<void>;
+            inputs: {
+                htmlText: string;
+                canvasSize: string;
+                file: string;
+            };
+        };
+        imageGenerator: {
+            if: string;
+            agent: string;
+            retry: number;
+            inputs: {
+                media: string;
+                prompt: string;
+                referenceImages: string;
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    mulmoContext: string;
+                    sessionType: string;
+                };
+                params: {
+                    model: string;
+                    moderation: string;
+                    canvasSize: string;
+                    quality: string;
+                };
+            };
+            defaultValue: {};
+        };
+        movieGenerator: {
+            if: string;
+            agent: string;
+            inputs: {
+                media: string;
+                onComplete: string[];
+                prompt: string;
+                imagePath: string;
+                movieFile: string;
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    sessionType: string;
+                    mulmoContext: string;
+                };
+                params: {
+                    model: string;
+                    duration: string;
+                    canvasSize: string;
+                };
+            };
+            defaultValue: {};
+        };
+        imageFromMovie: {
+            if: string;
+            agent: (namedInputs: {
+                movieFile: string;
+                imageFile: string;
+            }) => Promise<object>;
+            inputs: {
+                onComplete: string[];
+                imageFile: string;
+                movieFile: string;
+            };
+            defaultValue: {};
+        };
+        audioChecker: {
+            agent: (namedInputs: {
+                movieFile: string;
+                imageFile: string;
+                soundEffectFile: string;
+            }) => Promise<{
+                hasMovieAudio: boolean;
+            }>;
+            inputs: {
+                onComplete: string[];
+                movieFile: string;
+                imageFile: string;
+                soundEffectFile: string;
+            };
+        };
+        soundEffectGenerator: {
+            if: string;
+            agent: string;
+            inputs: {
+                onComplete: string[];
+                prompt: string;
+                movieFile: string;
+                soundEffectFile: string;
+                params: {
+                    model: string;
+                    duration: string;
+                };
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    sessionType: string;
+                    mulmoContext: string;
+                };
+            };
+            defaultValue: {};
+        };
+        AudioTrimmer: {
+            if: string;
+            agent: (namedInputs: {
+                audioFile: string;
+                bgmFile: string;
+                startAt: number;
+                duration: number;
+            }) => Promise<{
+                buffer: Buffer<ArrayBufferLike>;
+            }>;
+            inputs: {
+                onComplete: string[];
+                audioFile: string;
+                bgmFile: string;
+                startAt: string;
+                duration: string;
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    mulmoContext: string;
+                };
+            };
+            defaultValue: {};
+        };
+        lipSyncGenerator: {
+            if: string;
+            agent: string;
+            inputs: {
+                onComplete: string[];
+                movieFile: string;
+                imageFile: string;
+                audioFile: string;
+                lipSyncFile: string;
+                params: {
+                    model: string;
+                    duration: string;
+                };
+                cache: {
+                    force: string[];
+                    file: string;
+                    index: string;
+                    id: string;
+                    sessionType: string;
+                    mulmoContext: string;
+                };
+            };
+            defaultValue: {};
+        };
+        output: {
+            agent: string;
+            inputs: {
+                onComplete: string[];
+                imageFile: string;
+                movieFile: string;
+                soundEffectFile: string;
+                lipSyncFile: string;
+                hasMovieAudio: string;
+                htmlImageFile: string;
+            };
+            output: {
+                imageFile: string;
+                movieFile: string;
+                soundEffectFile: string;
+                lipSyncFile: string;
+                hasMovieAudio: string;
+                htmlImageFile: string;
+            };
+            isResult: boolean;
+        };
+    };
+};
+export declare const images_graph_data: GraphData;
 export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
 type ImageOptions = {
     imageAgents: Record<string, unknown>;

package/lib/actions/images.js CHANGED Viewed

@@ -41,7 +41,7 @@ const defaultAgents = {
     anthropicAgent,
 };
 dotenv.config();
-const beat_graph_data = {
+export const beat_graph_data = {
     version: 0.5,
     concurrency: 4,
     nodes: {
@@ -297,7 +297,7 @@ const beat_graph_data = {
         },
     },
 };
-const graph_data = {
+export const images_graph_data = {
     version: 0.5,
     concurrency: 4,
     nodes: {
@@ -403,7 +403,7 @@ const generateImages = async (context, args) => {
         ...defaultAgents,
         ...optionImageAgents,
     };
-    const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
+    const graph = new GraphAI(images_graph_data, graphaiAgent, await graphOption(context, settings));
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });

package/lib/actions/index.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 export * from "./audio.js";
 export * from "./images.js";
+export * from "./captions.js";
 export * from "./image_references.js";
 export * from "./image_agents.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";
-export * from "./captions.js";

package/lib/actions/index.js CHANGED Viewed

@@ -1,8 +1,8 @@
 export * from "./audio.js";
 export * from "./images.js";
+export * from "./captions.js";
 export * from "./image_references.js";
 export * from "./image_agents.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";
-export * from "./captions.js";

package/lib/actions/translate.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import "dotenv/config";
+import type { GraphData } from "graphai";
 import type { LANG, MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
 export declare const translateTextGraph: {
     version: number;
@@ -45,6 +46,7 @@ export declare const translateTextGraph: {
         };
     };
 };
+export declare const translate_graph_data: GraphData;
 export declare const getOutputMultilingualFilePathAndMkdir: (context: MulmoStudioContext) => {
     outputMultilingualFilePath: string;
     outDirPath: string;

package/lib/actions/translate.js CHANGED Viewed

@@ -132,7 +132,7 @@ const beatGraph = {
         },
     },
 };
-const translateGraph = {
+export const translate_graph_data = {
     version: 0.5,
     nodes: {
         context: {},
@@ -276,7 +276,7 @@ export const translate = async (context, args) => {
             : [...new Set([context.lang, context.studio.script.captionParams?.lang].filter((x) => !isNull(x)))];
         const config = settings2GraphAIConfig(settings, process.env);
         assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
-        const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
+        const graph = new GraphAI(translate_graph_data, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
         graph.injectValue("context", context);
         graph.injectValue("targetLangs", targetLangs);
         graph.injectValue("outDirPath", outDirPath);

package/lib/agents/image_genai_agent.js CHANGED Viewed

@@ -1,8 +1,19 @@
 import fs from "fs";
 import { GraphAILogger } from "graphai";
-import { getAspectRatio } from "./movie_google_agent.js";
 import { provider2ImageAgent } from "../utils/provider2agent.js";
 import { GoogleGenAI, PersonGeneration } from "@google/genai";
+import { blankImagePath, blankSquareImagePath, blankVerticalImagePath } from "../utils/file.js";
+const getAspectRatio = (canvasSize) => {
+    if (canvasSize.width > canvasSize.height) {
+        return "16:9";
+    }
+    else if (canvasSize.width < canvasSize.height) {
+        return "9:16";
+    }
+    else {
+        return "1:1";
+    }
+};
 export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
     const { prompt, referenceImages } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
@@ -15,12 +26,22 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
         const ai = new GoogleGenAI({ apiKey });
         if (model === "gemini-2.5-flash-image-preview") {
             const contents = [{ text: prompt }];
-            referenceImages?.forEach((imagePath) => {
+            const images = [...(referenceImages ?? [])];
+            // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
+            if (aspectRatio === "9:16") {
+                images.push(blankVerticalImagePath());
+            }
+            else if (aspectRatio === "1:1") {
+                images.push(blankSquareImagePath());
+            }
+            else {
+                images.push(blankImagePath());
+            }
+            images.forEach((imagePath) => {
                 const imageData = fs.readFileSync(imagePath);
                 const base64Image = imageData.toString("base64");
                 contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
             });
-            // NOTE: There is no way to specify the aspect ratio for Gemini.
             const response = await ai.models.generateContent({ model, contents });
             if (!response.candidates?.[0]?.content?.parts) {
                 throw new Error("ERROR: generateContent returned no candidates");

package/lib/agents/movie_genai_agent.js CHANGED Viewed

@@ -33,6 +33,9 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
             },
             image: undefined,
         };
+        if (model === "veo-3.0-generate-preview") {
+            payload.config.durationSeconds = undefined;
+        }
         if (imagePath) {
             const buffer = readFileSync(imagePath);
             const imageBytes = buffer.toString("base64");

package/lib/data/promptTemplates.js CHANGED Viewed

@@ -430,7 +430,7 @@ export const promptTemplates = [
         },
         scriptName: "text_only_template.json",
         systemPrompt: "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
-        title: "Ghibli-style comic strips with nano banana.",
+        title: "Ghibli-style comic strips",
     },
     {
         description: "Template for Ghost in the shell style comic presentation.",
@@ -908,4 +908,11 @@ export const promptTemplates = [
         systemPrompt: "This script is for a movie trailer. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
         title: "Movie Trailer template",
     },
+    {
+        description: "Template for business analysis presentation.",
+        filename: "vision",
+        scriptName: "vision.json",
+        systemPrompt: "First, determine a set of slides (=beats) to present, and choose an appropriate style for each beat (from the JSON template blow) and add required data for it. For each beat, put an appropriate text to the text field for the presenter to read for that slide in details. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+        title: "Business Analysis",
+    },
 ];