npm - mulmocast - Versions diffs - 1.2.65 → 1.2.67 - Mend

mulmocast 1.2.65 → 1.2.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/lib/actions/bundle.js +32 -25
package/lib/actions/image_agents.js +6 -2
package/lib/actions/images.js +26 -4
package/lib/agents/image_genai_agent.d.ts +1 -0
package/lib/agents/image_genai_agent.js +48 -42
package/lib/agents/puppeteer_crawler_agent.js +2 -0
package/lib/utils/ffmpeg_utils.d.ts +1 -0
package/lib/utils/ffmpeg_utils.js +12 -0
package/package.json +3 -3
package/scripts/test/test_media.json +21 -1
package/scripts/test/test_reference.json +0 -0

package/lib/actions/bundle.js CHANGED Viewed

@@ -1,39 +1,30 @@
 import path from "path";
 import fs from "fs";
-import { GraphAILogger } from "graphai";
 import { listLocalizedAudioPaths } from "./audio.js";
-import { imagePreprocessAgent } from "./image_agents.js";
 import { mkdir } from "../utils/file.js";
 import { ZipBuilder } from "../utils/zip.js";
 import { bundleTargetLang } from "../utils/const.js";
-const beatImage = (context) => {
-    return async (beat, index) => {
-        try {
-            const res = await imagePreprocessAgent({ context, beat, index, imageRefs: {} });
-            if ("htmlPrompt" in res) {
-                return { htmlImageSource: res.htmlImageFile, imageSource: res.imagePath };
-            }
-            const { imagePath, movieFile, lipSyncFile } = res;
-            return { imageSource: imagePath, videoSource: movieFile, videoWithAudioSource: lipSyncFile };
-        }
-        catch (e) {
-            GraphAILogger.log(e);
-            return {};
-        }
-    };
-};
-// TODO reference
+import { createSilentAudio } from "../utils/ffmpeg_utils.js";
 const viewJsonFileName = "mulmo_view.json";
 const zipFileName = "mulmo.zip";
+const imageSourceMappings = [
+    ["imageFile", "imageSource"],
+    ["movieFile", "videoSource"],
+    ["soundEffectFile", "soundEffectSource"],
+    ["lipSyncFile", "videoWithAudioSource"],
+    ["htmlImageFile", "htmlImageSource"],
+];
 export const mulmoViewerBundle = async (context) => {
     const isZip = true;
     const dir = path.resolve(context.fileDirs.fileName);
     mkdir(dir);
     const zipper = new ZipBuilder(path.resolve(dir, zipFileName));
+    // text
     const resultJson = [];
     context.studio.script.beats.forEach((beat) => {
         resultJson.push({ text: beat.text, duration: beat.duration, audioSources: {}, multiLinguals: {} });
     });
+    // audio
     for (const lang of bundleTargetLang) {
         const audios = listLocalizedAudioPaths({ ...context, lang });
         audios.forEach((audio, index) => {
@@ -49,14 +40,13 @@ export const mulmoViewerBundle = async (context) => {
             }
         });
     }
-    const images = await Promise.all(context.studio.script.beats.map(beatImage(context)));
-    images.forEach((image, index) => {
+    // image, movie
+    context.studio.beats.forEach((image, index) => {
         const data = resultJson[index];
-        const keys = ["htmlImageSource", "imageSource", "videoSource", "videoWithAudioSource"];
-        keys.forEach((key) => {
+        imageSourceMappings.forEach(([key, source]) => {
             const value = image[key];
-            if (value) {
-                data[key] = path.basename(value);
+            if (typeof value === "string") {
+                data[source] = path.basename(value);
                 if (fs.existsSync(value)) {
                     fs.copyFileSync(value, path.resolve(dir, path.basename(value)));
                     zipper.addFile(value);
@@ -64,6 +54,23 @@ export const mulmoViewerBundle = async (context) => {
             }
         });
     });
+    // silent
+    await Promise.all(context.studio.script.beats.map(async (__, index) => {
+        const data = resultJson[index];
+        if (data.audioSources &&
+            Object.keys(data.audioSources).length === 0 &&
+            data.videoSource === undefined &&
+            data.videoWithAudioSource === undefined &&
+            data.duration) {
+            const file = `silent_${index}.mp3`;
+            const audioFile = path.resolve(dir, file);
+            await createSilentAudio(audioFile, data.duration);
+            zipper.addFile(audioFile);
+            data.audioSources.ja = file;
+            data.audioSources.en = file;
+        }
+    }));
+    // multiLinguals
     context.multiLingual.forEach((beat, index) => {
         bundleTargetLang.forEach((lang) => {
             if (resultJson[index] && resultJson[index].multiLinguals) {

package/lib/actions/image_agents.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { GraphAILogger } from "graphai";
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods, MulmoMediaSourceMethods } from "../methods/index.js";
 import { getBeatPngImagePath, getBeatMoviePaths, getAudioFilePath } from "../utils/file.js";
 import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
@@ -28,14 +29,17 @@ export const imagePreprocessAgent = async (namedInputs) => {
         beatDuration: beat.duration ?? studioBeat?.duration,
     };
     const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
-    if (isMovie) {
-        if (beat.soundEffectPrompt) {
+    if (beat.soundEffectPrompt) {
+        if (isMovie) {
             returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
             returnValue.soundEffectModel =
                 beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
             returnValue.soundEffectFile = moviePaths.soundEffectFile;
             returnValue.soundEffectPrompt = beat.soundEffectPrompt;
         }
+        else {
+            GraphAILogger.warn(`soundEffectPrompt is set, but there is no video. beat: ${index}`);
+        }
     }
     if (beat.enableLipSync) {
         const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);

package/lib/actions/images.js CHANGED Viewed

@@ -354,11 +354,33 @@ export const images_graph_data = {
                 studio.beats.forEach((studioBeat, index) => {
                     const beat = studio.script.beats[index];
                     if (beat.image?.type === "beat") {
-                        if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
-                            studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
+                        // reference Beat by plugin
+                        const referenceBeat = (() => {
+                            if (beat.image.id) {
+                                if (beatIndexMap[beat.image.id] !== undefined) {
+                                    return studio.beats[beatIndexMap[beat.image.id]];
+                                }
+                                else {
+                                    GraphAILogger.info(`reference beat not exist: id=${beat.image.id}`);
+                                }
+                            }
+                            else if (index > 0) {
+                                return studio.beats[index - 1];
+                            }
+                        })();
+                        if (referenceBeat === undefined) {
+                            // error?
+                            GraphAILogger.info(`reference beat not exist: index=${index}`);
                         }
-                        else if (index > 0) {
-                            studioBeat.imageFile = studio.beats[index - 1].imageFile;
+                        else {
+                            studioBeat.imageFile = referenceBeat.imageFile;
+                            studioBeat.movieFile = referenceBeat.movieFile;
+                            studioBeat.soundEffectFile = referenceBeat.soundEffectFile;
+                            studioBeat.lipSyncFile = referenceBeat.lipSyncFile;
+                            studioBeat.hasMovieAudio = referenceBeat.hasMovieAudio;
+                            studioBeat.htmlImageFile = referenceBeat.htmlImageFile;
+                            studioBeat.markdown = referenceBeat.markdown;
+                            studioBeat.html = referenceBeat.html;
                         }
                     }
                 });

package/lib/agents/image_genai_agent.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
 import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GenAIImageAgentConfig } from "../types/agent.js";
+export declare const ratio2BlankPath: (aspectRatio: string) => string;
 export declare const imageGenAIAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GenAIImageAgentConfig>;
 declare const imageGenAIAgentInfo: AgentFunctionInfo;
 export default imageGenAIAgentInfo;

package/lib/agents/image_genai_agent.js CHANGED Viewed

@@ -11,9 +11,53 @@ const getAspectRatio = (canvasSize) => {
     else if (canvasSize.width < canvasSize.height) {
         return "9:16";
     }
-    else {
-        return "1:1";
+    return "1:1";
+};
+export const ratio2BlankPath = (aspectRatio) => {
+    if (aspectRatio === "9:16") {
+        return blankVerticalImagePath();
+    }
+    else if (aspectRatio === "1:1") {
+        return blankSquareImagePath();
+    }
+    return blankImagePath();
+};
+const getGeminiContents = (prompt, aspectRatio, referenceImages) => {
+    const contents = [{ text: prompt }];
+    const images = [...(referenceImages ?? [])];
+    // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
+    images.push(ratio2BlankPath(aspectRatio));
+    images.forEach((imagePath) => {
+        const imageData = fs.readFileSync(imagePath);
+        const base64Image = imageData.toString("base64");
+        contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
+    });
+    return contents;
+};
+const geminiFlashResult = (response) => {
+    if (!response.candidates?.[0]?.content?.parts) {
+        throw new Error("ERROR: generateContent returned no candidates", {
+            cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
+        });
+    }
+    for (const part of response.candidates[0].content.parts) {
+        if (part.text) {
+            GraphAILogger.info("Gemini image generation response:", part.text);
+        }
+        else if (part.inlineData) {
+            const imageData = part.inlineData.data;
+            if (!imageData) {
+                throw new Error("ERROR: generateContent returned no image data", {
+                    cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
+                });
+            }
+            const buffer = Buffer.from(imageData, "base64");
+            return { buffer };
+        }
     }
+    throw new Error("ERROR: generateContent returned no image data", {
+        cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
+    });
 };
 export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
     const { prompt, referenceImages } = namedInputs;
@@ -28,47 +72,9 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
     try {
         const ai = new GoogleGenAI({ apiKey });
         if (model === "gemini-2.5-flash-image-preview") {
-            const contents = [{ text: prompt }];
-            const images = [...(referenceImages ?? [])];
-            // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
-            if (aspectRatio === "9:16") {
-                images.push(blankVerticalImagePath());
-            }
-            else if (aspectRatio === "1:1") {
-                images.push(blankSquareImagePath());
-            }
-            else {
-                images.push(blankImagePath());
-            }
-            images.forEach((imagePath) => {
-                const imageData = fs.readFileSync(imagePath);
-                const base64Image = imageData.toString("base64");
-                contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
-            });
+            const contents = getGeminiContents(prompt, aspectRatio, referenceImages);
             const response = await ai.models.generateContent({ model, contents });
-            if (!response.candidates?.[0]?.content?.parts) {
-                throw new Error("ERROR: generateContent returned no candidates", {
-                    cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
-                });
-            }
-            for (const part of response.candidates[0].content.parts) {
-                if (part.text) {
-                    GraphAILogger.info("Gemini image generation response:", part.text);
-                }
-                else if (part.inlineData) {
-                    const imageData = part.inlineData.data;
-                    if (!imageData) {
-                        throw new Error("ERROR: generateContent returned no image data", {
-                            cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
-                        });
-                    }
-                    const buffer = Buffer.from(imageData, "base64");
-                    return { buffer };
-                }
-            }
-            throw new Error("ERROR: generateContent returned no image data", {
-                cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
-            });
+            return geminiFlashResult(response);
         }
         else {
             const response = await ai.models.generateImages({

package/lib/agents/puppeteer_crawler_agent.js CHANGED Viewed

@@ -11,6 +11,7 @@ const waitStable = async (page, ms = 1200, step = 200) => {
     let last = -1;
     let stable = 0;
     while (stable < ms) {
+        // eslint-disable-next-line no-undef
         const len = await page.evaluate(() => document.body?.innerText?.length || 0);
         stable = len === last ? stable + step : 0;
         last = len;
@@ -38,6 +39,7 @@ const fetchArticle = async (url) => {
         let finalText = text;
         if (finalText.length < 100) {
             const raw = await page.evaluate(() => {
+                // eslint-disable-next-line no-undef
                 const el = document.querySelector("article, main, [role=main], .article, .post") || document.body;
                 return el?.textContent || "";
             });

package/lib/utils/ffmpeg_utils.d.ts CHANGED Viewed

@@ -17,3 +17,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
 }>;
 export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
 export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
+export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;

package/lib/utils/ffmpeg_utils.js CHANGED Viewed

@@ -132,3 +132,15 @@ export const trimMusic = (inputFile, startTime, duration) => {
         });
     });
 };
+export const createSilentAudio = (filePath, durationSec) => {
+    const filter = `anullsrc=r=44100:cl=stereo,atrim=duration=${durationSec},aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[a]`;
+    return new Promise((resolve, reject) => {
+        ffmpeg()
+            .complexFilter([filter])
+            .outputOptions(["-map", "[a]"])
+            .output(filePath)
+            .on("end", () => resolve())
+            .on("error", (err) => reject(err))
+            .run();
+    });
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "1.2.65",
+  "version": "1.2.67",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",
@@ -92,7 +92,7 @@
     "dotenv": "^17.2.3",
     "fluent-ffmpeg": "^2.1.3",
     "graphai": "^2.0.16",
-    "jsdom": "^27.0.0",
+    "jsdom": "^27.0.1",
     "marked": "^16.4.1",
     "mulmocast-vision": "^1.0.4",
     "ora": "^9.0.0",
@@ -116,7 +116,7 @@
     "prettier": "^3.6.2",
     "tsx": "^4.20.6",
     "typescript": "^5.9.3",
-    "typescript-eslint": "^8.46.1"
+    "typescript-eslint": "^8.46.2"
   },
   "engines": {
     "node": ">=20.0.0"

package/scripts/test/test_media.json CHANGED Viewed

@@ -49,7 +49,7 @@
     },
     {
       "speaker": "Presenter",
-      "text": "This is a reference beat.",
+      "text": "This is a image reference beat.",
       "duration": 0.5,
       "image": {
         "type": "beat",
@@ -81,6 +81,7 @@
       }
     },
     {
+      "id": "textSlide",
       "speaker": "Presenter",
       "text": "",
       "duration": 2,
@@ -94,6 +95,7 @@
     },
     {
       "speaker": "Presenter",
+      "id": "pingpongmov",
       "text": "This is a local movie with audio.",
       "image": {
         "type": "movie",
@@ -253,6 +255,24 @@
           "</footer>"
         ]
       }
+    },
+    {
+      "speaker": "Presenter",
+      "text": "This is a text slide reference beat.",
+      "duration": 0.5,
+      "image": {
+        "type": "beat",
+        "id": "textSlide"
+      }
+    },
+    {
+      "speaker": "Presenter",
+      "text": "This is a movie reference beat.",
+      "duration": 0.5,
+      "image": {
+        "type": "beat",
+        "id": "pingpongmov"
+      }
     }
   ]
 }

package/scripts/test/test_reference.json ADDED Viewed

File without changes