npm - mulmocast - Versions diffs - 0.1.2 → 0.1.4 - Mend

mulmocast 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/assets/templates/characters.json +16 -0
package/assets/templates/html.json +6 -0
package/lib/actions/audio.js +13 -19
package/lib/actions/image_agents.d.ts +145 -0
package/lib/actions/image_agents.js +59 -0
package/lib/actions/image_references.d.ts +9 -0
package/lib/actions/image_references.js +79 -0
package/lib/actions/images.d.ts +17 -109
package/lib/actions/images.js +83 -188
package/lib/actions/index.d.ts +2 -0
package/lib/actions/index.js +2 -0
package/lib/actions/movie.js +3 -1
package/lib/actions/pdf.js +5 -2
package/lib/agents/image_google_agent.d.ts +2 -15
package/lib/agents/image_google_agent.js +5 -5
package/lib/agents/image_openai_agent.d.ts +2 -17
package/lib/agents/image_openai_agent.js +9 -9
package/lib/agents/movie_google_agent.d.ts +2 -17
package/lib/agents/movie_google_agent.js +7 -7
package/lib/agents/movie_replicate_agent.d.ts +2 -16
package/lib/agents/movie_replicate_agent.js +4 -4
package/lib/agents/tts_google_agent.d.ts +9 -1
package/lib/agents/tts_google_agent.js +2 -2
package/lib/agents/tts_nijivoice_agent.js +1 -1
package/lib/agents/tts_openai_agent.d.ts +13 -1
package/lib/agents/tts_openai_agent.js +2 -2
package/lib/cli/helpers.js +7 -7
package/lib/index.d.ts +1 -0
package/lib/index.js +1 -0
package/lib/methods/index.d.ts +1 -0
package/lib/methods/index.js +1 -0
package/lib/methods/mulmo_beat.d.ts +6 -0
package/lib/methods/mulmo_beat.js +21 -0
package/lib/methods/mulmo_presentation_style.d.ts +3 -1
package/lib/methods/mulmo_presentation_style.js +31 -7
package/lib/methods/mulmo_studio_context.js +3 -0
package/lib/tools/story_to_script.js +2 -2
package/lib/types/agent.d.ts +55 -0
package/lib/types/agent.js +3 -0
package/lib/types/schema.d.ts +560 -296
package/lib/types/schema.js +19 -10
package/lib/types/type.d.ts +3 -2
package/lib/utils/const.d.ts +0 -1
package/lib/utils/const.js +0 -1
package/lib/utils/context.d.ts +24 -13
package/lib/utils/context.js +1 -0
package/lib/utils/ffmpeg_utils.d.ts +1 -1
package/lib/utils/ffmpeg_utils.js +1 -1
package/lib/utils/file.js +4 -4
package/lib/utils/filters.js +3 -4
package/lib/utils/markdown.js +1 -1
package/lib/utils/preprocess.d.ts +15 -8
package/lib/utils/provider2agent.d.ts +72 -0
package/lib/utils/provider2agent.js +81 -0
package/lib/utils/string.js +5 -5
package/lib/utils/utils.d.ts +13 -11
package/lib/utils/utils.js +56 -62
package/package.json +7 -6
package/scripts/templates/html.json +42 -0
package/scripts/templates/image_refs.json +35 -0

package/assets/templates/characters.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "title": "Story with multiple characters",
+  "description": "Template for story with multiple characters.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    }
+  },
+  "systemPrompt": "Generate a script for a the given story with multiple characters. Generate image prompts for each character, and make references to them in the beats. Use the JSON below as a template.",
+  "scriptName": "image_refs.json"
+}

package/assets/templates/html.json ADDED Viewed

@@ -0,0 +1,6 @@
+{
+  "title": "Business presentation in HTML",
+  "description": "Template for business presentation in HTML.",
+  "systemPrompt": "Generate a script for a business presentation of the given topic. Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.",
+  "scriptName": "html.json"
+}

package/lib/actions/audio.js CHANGED Viewed

@@ -12,18 +12,10 @@ import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
+import { provider2TTSAgent } from "../utils/provider2agent.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 const vanillaAgents = agents.default ?? agents;
-// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
-// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
-const provider_to_agent = {
-    nijivoice: "ttsNijivoiceAgent",
-    openai: "ttsOpenaiAgent",
-    google: "ttsGoogleAgent",
-    elevenlabs: "ttsElevenlabsAgent",
-    mock: "mediaMockAgent",
-};
 const getAudioPath = (context, beat, audioFile) => {
     if (beat.audio?.type === "audio") {
         const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
@@ -40,7 +32,7 @@ const getAudioPath = (context, beat, audioFile) => {
 const getAudioParam = (presentationStyle, beat) => {
     const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
     // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
+    const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
     const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
     return { voiceId, provider, speechOptions };
 };
@@ -61,7 +53,7 @@ const preprocessor = (namedInputs) => {
     studioBeat.audioFile = audioPath; // TODO
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
-        ttsAgent: provider_to_agent[provider],
+        ttsAgent: provider2TTSAgent[provider].agentName,
         text,
         voiceId,
         speechOptions,
@@ -91,11 +83,13 @@ const graph_tts = {
             agent: ":preprocessor.ttsAgent",
             inputs: {
                 text: ":preprocessor.text",
-                file: ":preprocessor.audioPath",
-                force: ":context.force",
-                mulmoContext: ":context", // for cache
-                index: ":__mapIndex", // for cache
-                sessionType: "audio", // for cache
+                cache: {
+                    force: [":context.force"],
+                    file: ":preprocessor.audioPath",
+                    index: ":__mapIndex",
+                    mulmoContext: ":context",
+                    sessionType: "audio",
+                },
                 params: {
                     voice: ":preprocessor.voiceId",
                     speed: ":preprocessor.speechOptions.speed",
@@ -184,8 +178,8 @@ export const audioFilePath = (context) => {
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
     const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
-        const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
-        return provider === "nijivoice" || provider === "elevenlabs";
+        const provider = (speaker.provider ?? context.presentationStyle.speechParams.provider);
+        return provider2TTSAgent[provider].hasLimitedConcurrency;
     });
     return hasLimitedConcurrencyProvider ? 1 : 8;
 };
@@ -239,7 +233,7 @@ export const audio = async (context, settings, callbacks) => {
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
         mkdir(outDirPath);
         mkdir(audioSegmentDirPath);
-        const config = settings2GraphAIConfig(settings);
+        const config = settings2GraphAIConfig(settings, process.env);
         const taskManager = new TaskManager(getConcurrency(context));
         const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
         graph.injectValue("context", context);

package/lib/actions/image_agents.d.ts ADDED Viewed

@@ -0,0 +1,145 @@
+import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension } from "../types/index.js";
+export declare const imagePreprocessAgent: (namedInputs: {
+    context: MulmoStudioContext;
+    beat: MulmoBeat;
+    index: number;
+    imageRefs: Record<string, string>;
+}) => Promise<{
+    imagePath: string;
+    htmlPrompt: string | undefined;
+    htmlPath: string;
+    htmlImageSystemPrompt: string[];
+} | {
+    imagePath: string | undefined;
+    referenceImageForMovie: string | undefined;
+    imageParams: {
+        provider: string;
+        style?: string | undefined;
+        model?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        } | {
+            type: "imagePrompt";
+            prompt: string;
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+    htmlPrompt?: undefined;
+    htmlPath?: undefined;
+    htmlImageSystemPrompt?: undefined;
+} | {
+    imagePath: string;
+    imageFromMovie: boolean;
+    movieParams: {
+        speed?: number | undefined;
+        model?: string | undefined;
+        fillOption?: {
+            style: "aspectFit" | "aspectFill";
+        } | undefined;
+        provider?: string | undefined;
+        transition?: {
+            type: "fade" | "slideout_left";
+            duration: number;
+        } | undefined;
+    };
+    imageParams: {
+        provider: string;
+        style?: string | undefined;
+        model?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        } | {
+            type: "imagePrompt";
+            prompt: string;
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+    htmlPrompt?: undefined;
+    htmlPath?: undefined;
+    htmlImageSystemPrompt?: undefined;
+} | {
+    imagePath: string;
+    referenceImageForMovie: string;
+    imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
+    prompt: string;
+    referenceImages: string[];
+    movieParams: {
+        speed?: number | undefined;
+        model?: string | undefined;
+        fillOption?: {
+            style: "aspectFit" | "aspectFill";
+        } | undefined;
+        provider?: string | undefined;
+        transition?: {
+            type: "fade" | "slideout_left";
+            duration: number;
+        } | undefined;
+    };
+    imageParams: {
+        provider: string;
+        style?: string | undefined;
+        model?: string | undefined;
+        moderation?: string | undefined;
+        images?: Record<string, {
+            type: "image";
+            source: {
+                url: string;
+                kind: "url";
+            } | {
+                kind: "base64";
+                data: string;
+            } | {
+                text: string;
+                kind: "text";
+            } | {
+                path: string;
+                kind: "path";
+            };
+        } | {
+            type: "imagePrompt";
+            prompt: string;
+        }> | undefined;
+    };
+    movieFile: string | undefined;
+    htmlPrompt?: undefined;
+    htmlPath?: undefined;
+    htmlImageSystemPrompt?: undefined;
+}>;
+export declare const imagePluginAgent: (namedInputs: {
+    context: MulmoStudioContext;
+    beat: MulmoBeat;
+    index: number;
+}) => Promise<void>;
+export declare const htmlImageGeneratorAgent: (namedInputs: {
+    file: string;
+    canvasSize: MulmoCanvasDimension;
+    htmlText: string;
+}) => Promise<void>;

package/lib/actions/image_agents.js ADDED Viewed

@@ -0,0 +1,59 @@
+import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods } from "../methods/index.js";
+import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
+import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
+import { renderHTMLToImage } from "../utils/markdown.js";
+import { GraphAILogger } from "graphai";
+const htmlStyle = (context, beat) => {
+    return {
+        canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
+        textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
+    };
+};
+export const imagePreprocessAgent = async (namedInputs) => {
+    const { context, beat, index, imageRefs } = namedInputs;
+    const imagePath = getBeatPngImagePath(context, index);
+    if (beat.htmlPrompt) {
+        const htmlPrompt = MulmoBeatMethods.getHtmlPrompt(beat);
+        const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
+        return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
+    }
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
+    const returnValue = {
+        imageParams: imageAgentInfo.imageParams,
+        movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
+    };
+    if (beat.image) {
+        const plugin = MulmoBeatMethods.getPlugin(beat);
+        const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
+        // undefined prompt indicates that image generation is not needed
+        return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
+    }
+    const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
+    GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
+    if (beat.moviePrompt && !beat.imagePrompt) {
+        return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
+    }
+    // referenceImages for "edit_image", openai agent.
+    const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
+    const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
+    return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
+};
+export const imagePluginAgent = async (namedInputs) => {
+    const { context, beat, index } = namedInputs;
+    const imagePath = getBeatPngImagePath(context, index);
+    const plugin = MulmoBeatMethods.getPlugin(beat);
+    try {
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
+        const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
+        await plugin.process(processorParams);
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
+    }
+    catch (error) {
+        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
+        throw error;
+    }
+};
+export const htmlImageGeneratorAgent = async (namedInputs) => {
+    const { file, canvasSize, htmlText } = namedInputs;
+    await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
+};

package/lib/actions/image_references.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { MulmoStudioContext, MulmoImagePromptMedia } from "../types/index.js";
+export declare const generateReferenceImage: (inputs: {
+    context: MulmoStudioContext;
+    key: string;
+    index: number;
+    image: MulmoImagePromptMedia;
+    force?: boolean;
+}) => Promise<string>;
+export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;

package/lib/actions/image_references.js ADDED Viewed

@@ -0,0 +1,79 @@
+import fs from "fs";
+import { GraphAI } from "graphai";
+import { getReferenceImagePath } from "../utils/file.js";
+import { getExtention } from "../utils/utils.js";
+import { graphOption } from "./images.js";
+import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
+import { imageGoogleAgent, imageOpenaiAgent } from "../agents/index.js";
+// public api
+// Application may call this function directly to generate reference image.
+export const generateReferenceImage = async (inputs) => {
+    const { context, key, index, image, force } = inputs;
+    const imagePath = getReferenceImagePath(context, key, "png");
+    // generate image
+    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
+    const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
+    const image_graph_data = {
+        version: 0.5,
+        nodes: {
+            imageGenerator: {
+                agent: imageAgentInfo.agent,
+                retry: 2,
+                inputs: {
+                    prompt,
+                    cache: {
+                        force: [context.force, force ?? false],
+                        file: imagePath,
+                        index,
+                        mulmoContext: context,
+                        sessionType: "imageReference",
+                    },
+                },
+                params: {
+                    model: imageAgentInfo.imageParams.model,
+                    canvasSize: context.presentationStyle.canvasSize,
+                },
+            },
+        },
+    };
+    const options = await graphOption(context);
+    const graph = new GraphAI(image_graph_data, { imageGoogleAgent, imageOpenaiAgent }, options);
+    await graph.run();
+    return imagePath;
+};
+const downLoadImage = async (context, key, url) => {
+    const response = await fetch(url);
+    if (!response.ok) {
+        throw new Error(`Failed to download image: ${url}`);
+    }
+    const buffer = Buffer.from(await response.arrayBuffer());
+    // Detect file extension from Content-Type header or URL
+    const extension = getExtention(response.headers.get("content-type"), url);
+    const imagePath = getReferenceImagePath(context, key, extension);
+    await fs.promises.writeFile(imagePath, buffer);
+    return imagePath;
+};
+export const getImageRefs = async (context) => {
+    const images = context.presentationStyle.imageParams?.images;
+    if (!images) {
+        return {};
+    }
+    const imageRefs = {};
+    await Promise.all(Object.keys(images)
+        .sort()
+        .map(async (key, index) => {
+        const image = images[key];
+        if (image.type === "imagePrompt") {
+            imageRefs[key] = await generateReferenceImage({ context, key, index, image, force: false });
+        }
+        else if (image.type === "image") {
+            if (image.source.kind === "path") {
+                imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
+            }
+            else if (image.source.kind === "url") {
+                imageRefs[key] = await downLoadImage(context, key, image.source.url);
+            }
+        }
+    }));
+    return imageRefs;
+};

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,112 +1,20 @@
-import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
-export declare const imagePreprocessAgent: (namedInputs: {
-    context: MulmoStudioContext;
-    beat: MulmoBeat;
+import type { GraphOptions, CallbackFunction } from "graphai";
+import { MulmoStudioContext } from "../types/index.js";
+export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
+type ImageOptions = {
+    imageAgents: Record<string, unknown>;
+};
+export declare const images: (context: MulmoStudioContext, args?: {
+    settings?: Record<string, string>;
+    callbacks?: CallbackFunction[];
+    options?: ImageOptions;
+}) => Promise<MulmoStudioContext>;
+export declare const generateBeatImage: (inputs: {
     index: number;
-    imageRefs: Record<string, string>;
-}) => Promise<{
-    imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
-        model?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        }> | undefined;
-    };
-    movieFile: string | undefined;
-    imagePath: string | undefined;
-    referenceImage: string | undefined;
-    htmlPrompt?: undefined;
-    htmlPath?: undefined;
-    htmlImageSystemPrompt?: undefined;
-} | {
-    imagePath: string;
-    htmlPrompt: string;
-    htmlPath: string;
-    htmlImageSystemPrompt: string[];
-} | {
-    imagePath: string;
-    images: string[];
-    imageFromMovie: boolean;
-    imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
-        model?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        }> | undefined;
-    };
-    movieFile: string | undefined;
-    htmlPrompt?: undefined;
-    htmlPath?: undefined;
-    htmlImageSystemPrompt?: undefined;
-} | {
-    images: string[];
-    imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
-        model?: string | undefined;
-        moderation?: string | undefined;
-        images?: Record<string, {
-            type: "image";
-            source: {
-                url: string;
-                kind: "url";
-            } | {
-                kind: "base64";
-                data: string;
-            } | {
-                text: string;
-                kind: "text";
-            } | {
-                path: string;
-                kind: "path";
-            };
-        }> | undefined;
-    };
-    movieFile: string | undefined;
-    imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
-    imagePath: string;
-    referenceImage: string;
-    prompt: string;
-    htmlPrompt?: undefined;
-    htmlPath?: undefined;
-    htmlImageSystemPrompt?: undefined;
-}>;
-export declare const imagePluginAgent: (namedInputs: {
     context: MulmoStudioContext;
-    beat: MulmoBeat;
-    index: number;
+    settings?: Record<string, string>;
+    callbacks?: CallbackFunction[];
+    forceMovie?: boolean;
+    forceImage?: boolean;
 }) => Promise<void>;
-export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
-export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
-export declare const generateBeatImage: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
+export {};