npm - mulmocast - Versions diffs - 0.0.8 → 0.0.9 - Mend

mulmocast 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/assets/templates/akira_comic.json +28 -0
package/assets/templates/children_book.json +13 -0
package/assets/templates/comic_strips.json +14 -1
package/assets/templates/drslump_comic.json +28 -0
package/assets/templates/ghibli_comic.json +28 -0
package/assets/templates/ghost_comic.json +35 -0
package/assets/templates/onepiece_comic.json +28 -0
package/assets/templates/sensei_and_taro.json +21 -0
package/lib/actions/audio.js +2 -2
package/lib/actions/captions.js +2 -2
package/lib/actions/images.js +48 -6
package/lib/actions/movie.d.ts +1 -1
package/lib/actions/movie.js +13 -11
package/lib/actions/pdf.js +6 -4
package/lib/actions/translate.js +2 -2
package/lib/agents/image_openai_agent.d.ts +1 -0
package/lib/agents/image_openai_agent.js +15 -3
package/lib/cli/bin.js +7 -0
package/lib/cli/helpers.js +2 -1
package/lib/tools/create_mulmo_script_from_url.js +2 -2
package/lib/tools/create_mulmo_script_interactively.js +2 -2
package/lib/tools/story_to_script.js +2 -2
package/lib/types/schema.d.ts +1738 -228
package/lib/types/schema.js +8 -2
package/lib/utils/file.js +20 -9
package/lib/utils/pdf.d.ts +1 -0
package/lib/utils/pdf.js +5 -3
package/lib/utils/preprocess.d.ts +50 -16
package/package.json +9 -9
package/scripts/templates/children_book.json +0 -7
package/scripts/templates/image_prompts_template.json +41 -0
package/scripts/templates/sensei_and_taro.json +0 -11
package/scripts/templates/text_only_template.json +35 -0
package/assets/templates/ghibli_strips.json +0 -6
package/scripts/templates/comic_strips.json +0 -30
package/scripts/templates/ghibli_strips.json +0 -30

package/assets/templates/akira_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Dr. Slump Style Comic Strips",
+  "description": "Template for Dr. Slump-style comic.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>AKIRA aesthetic.</style>",
+      "images": {
+        "girl": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/children_book.json CHANGED Viewed

@@ -2,5 +2,18 @@
   "title": "Children Book",
   "description": "Template for children book.",
   "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
+    }
+  },
   "scriptName": "children_book.json"
 }

package/assets/templates/comic_strips.json CHANGED Viewed

@@ -2,5 +2,18 @@
   "title": "American Comic Strips",
   "description": "Template for Dilbert-style comic strips.",
   "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
-  "scriptName": "comic_strips.json"
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
+    }
+  },
+  "scriptName": "text_only_template.json"
 }

package/assets/templates/drslump_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Dr. Slump Style Comic Strips",
+  "description": "Template for Dr. Slump-style comic.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
+      "images": {
+        "girl": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/ghibli_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "American Comic Strips",
+  "description": "Template for Dilbert-style comic strips.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/ghost_comic.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+  "title": "Dr. Slump Style Comic Strips",
+  "description": "Template for Dr. Slump-style comic.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghost in the shell aesthetic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
+          }
+        },
+        "optimus": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/onepiece_comic.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "title": "Dr. Slump Style Comic Strips",
+  "description": "Template for Dr. Slump-style comic.",
+  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>One Piece aesthetic.</style>",
+      "images": {
+        "presenter": {
+          "type": "image",
+          "source": {
+            "kind": "url",
+            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
+          }
+        }
+      }
+    }
+  },
+  "scriptName": "image_prompts_template.json"
+}

package/assets/templates/sensei_and_taro.json CHANGED Viewed

@@ -2,5 +2,26 @@
   "title": "Student and Teacher",
   "description": "Interactive discussion between a student and teacher",
   "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
+  "presentationStyle": {
+    "$mulmocast": {
+      "version": "1.0",
+      "credit": "closing"
+    },
+    "canvasSize": {
+      "width": 1536,
+      "height": 1024
+    },
+    "imageParams": {
+      "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
+    },
+    "speechParams": {
+      "provider": "nijivoice",
+      "speakers": {
+        "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
+        "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
+        "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
+      }
+    }
+  },
   "scriptName": "sensei_and_taro.json"
 }

package/lib/actions/audio.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import "dotenv/config";
 import { GraphAI } from "graphai";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
 import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
 const provider_to_agent = {

package/lib/actions/captions.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import { GraphAI, GraphAILogger } from "graphai";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 const graph_data = {
     version: 0.5,
     nodes: {

package/lib/actions/images.js CHANGED Viewed

@@ -1,15 +1,16 @@
 import dotenv from "dotenv";
+import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import imageGoogleAgent from "../agents/image_google_agent.js";
 import imageOpenaiAgent from "../agents/image_openai_agent.js";
-import { MulmoScriptMethods } from "../methods/index.js";
+import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
 import { imagePrompt } from "../utils/prompt.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
@@ -21,7 +22,7 @@ const htmlStyle = (script, beat) => {
     };
 };
 const imagePreprocessAgent = async (namedInputs) => {
-    const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
+    const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
     const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
     const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
     const returnValue = {
@@ -44,7 +45,12 @@ const imagePreprocessAgent = async (namedInputs) => {
         }
     }
     const prompt = imagePrompt(beat, imageParams.style);
-    return { path: imagePath, prompt, ...returnValue };
+    const images = (() => {
+        const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
+        const sources = imageNames.map((name) => imageRefs[name]);
+        return sources.filter((source) => source !== undefined);
+    })();
+    return { path: imagePath, prompt, ...returnValue, images };
 };
 const graph_data = {
     version: 0.5,
@@ -54,9 +60,16 @@ const graph_data = {
         imageDirPath: {},
         imageAgentInfo: {},
         outputStudioFilePath: {},
+        imageRefs: {},
         map: {
             agent: "mapAgent",
-            inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
+            inputs: {
+                rows: ":context.studio.script.beats",
+                context: ":context",
+                imageAgentInfo: ":imageAgentInfo",
+                imageDirPath: ":imageDirPath",
+                imageRefs: ":imageRefs",
+            },
             isResult: true,
             params: {
                 rowKey: "beat",
@@ -73,6 +86,7 @@ const graph_data = {
                             suffix: "p",
                             imageDirPath: ":imageDirPath",
                             imageAgentInfo: ":imageAgentInfo",
+                            imageRefs: ":imageRefs",
                         },
                     },
                     imageGenerator: {
@@ -92,6 +106,7 @@ const graph_data = {
                                 size: ":preprocessor.imageParams.size",
                                 moderation: ":preprocessor.imageParams.moderation",
                                 aspectRatio: ":preprocessor.aspectRatio",
+                                images: ":preprocessor.images",
                             },
                         },
                         defaultValue: {},
@@ -170,12 +185,39 @@ const generateImages = async (context) => {
             },
         };
     }
+    if (imageAgentInfo.provider === "openai") {
+        // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
+        // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
+        // gpt-image-1：3,000,000 TPM、150 images per minute
+        graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
+    }
+    const imageRefs = {};
+    const images = studio.script.imageParams?.images;
+    if (images) {
+        await Promise.all(Object.keys(images).map(async (key) => {
+            const image = images[key];
+            if (image.source.kind === "path") {
+                imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
+            }
+            else if (image.source.kind === "url") {
+                const response = await fetch(image.source.url);
+                if (!response.ok) {
+                    throw new Error(`Failed to download image: ${image.source.url}`);
+                }
+                const buffer = Buffer.from(await response.arrayBuffer());
+                const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
+                await fs.promises.writeFile(imagePath, buffer);
+                imageRefs[key] = imagePath;
+            }
+        }));
+    }
     GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
     const injections = {
         context,
         imageAgentInfo,
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
         imageDirPath,
+        imageRefs,
     };
     const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
     Object.keys(injections).forEach((key) => {

package/lib/actions/movie.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
     videoId: string;
     videoPart: string;
 };
-export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
+export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
     audioId: string;
     audioPart: string;
 };

package/lib/actions/movie.js CHANGED Viewed

@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
         videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
     };
 };
-export const getAudioPart = (inputIndex, duration, delay) => {
+export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
     const audioId = `a${inputIndex}`;
     return {
         audioId,
         audioPart: `[${inputIndex}:a]` +
             `atrim=duration=${duration},` + // Trim to beat duration
             `adelay=${delay * 1000}|${delay * 1000},` +
+            `volume=${mixAudio},` + // 👈 add this line
             `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
             `[${audioId}]`,
     };
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
     // Add each image input
     const filterComplexVideoIds = [];
     const filterComplexAudioIds = [];
-    studio.beats.reduce((timestamp, beat, index) => {
-        if (!beat.imageFile || !beat.duration) {
-            throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
+    studio.beats.reduce((timestamp, studioBeat, index) => {
+        const beat = studio.script.beats[index];
+        if (!studioBeat.imageFile || !studioBeat.duration) {
+            throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
         }
-        const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
-        const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
+        const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.imageFile);
+        const mediaType = MulmoScriptMethods.getImageType(studio.script, beat);
         const extraPadding = (() => {
             // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
             if (index === 0) {
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
             }
             return 0;
         })();
-        const duration = beat.duration + extraPadding;
+        const duration = studioBeat.duration + extraPadding;
         const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
         ffmpegContext.filterComplex.push(videoPart);
-        if (caption && beat.captionFile) {
-            const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
+        if (caption && studioBeat.captionFile) {
+            const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
             const compositeVideoId = `c${index}`;
             ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
             filterComplexVideoIds.push(compositeVideoId);
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         else {
             filterComplexVideoIds.push(videoId);
         }
-        if (mediaType === "movie") {
-            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
+        if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
+            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
             filterComplexAudioIds.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);
         }

package/lib/actions/pdf.js CHANGED Viewed

@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
             const pos = (() => {
                 if (isLandscapeImage) {
                     const cellHeight = pageHeight / imagesPerPage - offset;
-                    const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
-                    const x = offset;
+                    const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
+                    const x = offset + (containerWidth - drawWidth) / 2;
                     const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
                     return {
                         x,
                         y,
                         width: drawWidth,
                         height: drawHeight,
+                        containerWidth,
                     };
                 }
                 else {
                     const cellWidth = pageWidth / imagesPerPage;
-                    const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
+                    const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
                     const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
                     const y = pageHeight - drawHeight - offset;
                     return {
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
                         y,
                         width: drawWidth,
                         height: drawHeight,
+                        containerWidth,
                     };
                 }
             })();
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
                 for (const [index, line] of lines.entries()) {
                     page.drawText(line, {
                         ...pos,
-                        x: pos.x + pos.width + textMargin,
+                        x: offset + pos.containerWidth + textMargin,
                         y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
                         size: fontSize,
                         font,

package/lib/actions/translate.js CHANGED Viewed

@@ -1,13 +1,13 @@
 import "dotenv/config";
 import { GraphAI, assert } from "graphai";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 const translateGraph = {
     version: 0.5,
     nodes: {

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export declare const imageOpenaiAgent: AgentFunction<{
     model: string;
     size: OpenAIImageSize | null | undefined;
     moderation: OpenAIModeration | null | undefined;
+    images: string[] | null | undefined;
 }, {
     buffer: Buffer;
 }, {

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -1,8 +1,9 @@
-import OpenAI from "openai";
+import fs from "fs";
+import OpenAI, { toFile } from "openai";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params }) => {
     const { prompt } = namedInputs;
-    const { apiKey, model, size, moderation } = params;
+    const { apiKey, model, size, moderation, images } = params;
     const openai = new OpenAI({ apiKey });
     const imageOptions = {
         model: model ?? "dall-e-3",
@@ -13,7 +14,18 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
     if (model === "gpt-image-1") {
         imageOptions.moderation = moderation || "auto";
     }
-    const response = await openai.images.generate(imageOptions);
+    const response = await (async () => {
+        const targetSize = imageOptions.size;
+        if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
+            const imagelist = await Promise.all((images ?? []).map(async (file) => await toFile(fs.createReadStream(file), null, {
+                type: "image/png", // TODO: Support JPEG as well
+            })));
+            return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
+        }
+        else {
+            return await openai.images.generate(imageOptions);
+        }
+    })();
     if (!response.data) {
         throw new Error(`response.data is undefined: ${response}`);
     }

package/lib/cli/bin.js CHANGED Viewed

@@ -2,6 +2,9 @@
 import "dotenv/config";
 import yargs from "yargs/yargs";
 import { hideBin } from "yargs/helpers";
+import { readFileSync } from "fs";
+import { fileURLToPath } from "url";
+import { dirname, join } from "path";
 import * as translateCmd from "./commands/translate/index.js";
 import * as audioCmd from "./commands/audio/index.js";
 import * as imagesCmd from "./commands/image/index.js";
@@ -9,9 +12,13 @@ import * as movieCmd from "./commands/movie/index.js";
 import * as pdfCmd from "./commands/pdf/index.js";
 import * as toolCmd from "./commands/tool/index.js";
 import { GraphAILogger } from "graphai";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const packageJson = JSON.parse(readFileSync(join(__dirname, "../../package.json"), "utf8"));
 export const main = async () => {
     const cli = yargs(hideBin(process.argv))
         .scriptName("mulmo")
+        .version(packageJson.version)
         .usage("$0 <command> [options]")
         .option("v", {
         alias: "verbose",

package/lib/cli/helpers.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { GraphAILogger } from "graphai";
 import fs from "fs";
 import path from "path";
 import clipboardy from "clipboardy";
-import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath } from "../utils/file.js";
+import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath, mkdir } from "../utils/file.js";
 import { isHttp } from "../utils/utils.js";
 import { createOrUpdateStudioData } from "../utils/preprocess.js";
 import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
@@ -33,6 +33,7 @@ export const getFileObject = (args) => {
             const fileName = `script_${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
             const clipboardText = clipboardy.readSync();
             const fileOrUrl = resolveDirPath(outDirPath, `${fileName}.json`);
+            mkdir(outDirPath);
             fs.writeFileSync(fileOrUrl, clipboardText, "utf8");
             return { fileOrUrl, fileName };
         }

package/lib/tools/create_mulmo_script_from_url.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { geminiAgent } from "@graphai/gemini_agent";
 import { groqAgent } from "@graphai/groq_agent";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { browserlessAgent } from "@graphai/browserless_agent";
 import validateSchemaAgent from "../agents/validate_schema_agent.js";
@@ -14,7 +14,7 @@ import { mulmoScriptSchema, urlsSchema } from "../types/schema.js";
 import { cliLoadingPlugin } from "../utils/plugins.js";
 import { graphDataScriptFromUrlPrompt } from "../utils/prompt.js";
 import { llmPair } from "../utils/utils.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 const graphData = {
     version: 0.5,
     // Execute sequentially because the free version of browserless API doesn't support concurrent execution.

package/lib/tools/create_mulmo_script_interactively.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { geminiAgent } from "@graphai/gemini_agent";
 import { groqAgent } from "@graphai/groq_agent";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { readTemplatePrompt, mkdir } from "../utils/file.js";
 import { browserlessCacheGenerator } from "../utils/filters.js";
@@ -16,7 +16,7 @@ import validateSchemaAgent from "../agents/validate_schema_agent.js";
 import { llmPair } from "../utils/utils.js";
 import { interactiveClarificationPrompt, prefixPrompt } from "../utils/prompt.js";
 // import { cliLoadingPlugin } from "../utils/plugins.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 const agentHeader = "\x1b[34m● \x1b[0m\x1b[1mAgent\x1b[0m:\x1b[0m";
 const graphDataForScraping = {
     version: 0.5,

package/lib/tools/story_to_script.js CHANGED Viewed

@@ -5,14 +5,14 @@ import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { geminiAgent } from "@graphai/gemini_agent";
 import { groqAgent } from "@graphai/groq_agent";
-import * as agents from "@graphai/vanilla";
+import vanillaAgents from "@graphai/vanilla";
 import { graphDataScriptGeneratePrompt, sceneToBeatsPrompt, storyToScriptInfoPrompt, storyToScriptPrompt } from "../utils/prompt.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import validateSchemaAgent from "../agents/validate_schema_agent.js";
 import { llmPair } from "../utils/utils.js";
 import { storyToScriptGenerateMode } from "../utils/const.js";
 import { cliLoadingPlugin } from "../utils/plugins.js";
-const { default: __, ...vanillaAgents } = agents;
+// const { default: __, ...vanillaAgents } = agents;
 const createValidatedScriptGraphData = ({ systemPrompt, prompt, schema, llmAgent, llmModel, maxTokens, }) => {
     return {
         loop: {