npm - mulmocast - Versions diffs - 1.2.11 → 1.2.12 - Mend

mulmocast 1.2.11 → 1.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/lib/actions/audio.d.ts +3 -1
package/lib/actions/audio.js +43 -7
package/lib/actions/image_references.js +3 -3
package/lib/actions/translate.js +12 -3
package/lib/agents/image_genai_agent.js +47 -17
package/lib/index.common.d.ts +2 -0
package/lib/index.common.js +2 -0
package/lib/methods/mulmo_media_source.js +2 -3
package/lib/methods/mulmo_presentation_style.d.ts +5 -0
package/lib/methods/mulmo_presentation_style.js +5 -1
package/lib/methods/mulmo_script.d.ts +5 -0
package/lib/methods/mulmo_script.js +5 -1
package/lib/methods/mulmo_studio_context.d.ts +5 -1
package/lib/methods/mulmo_studio_context.js +5 -4
package/lib/types/agent.d.ts +3 -1
package/lib/utils/file.d.ts +1 -0
package/lib/utils/file.js +4 -0
package/lib/utils/provider2agent.js +1 -1
package/lib/utils/utils.d.ts +5 -0
package/lib/utils/utils.js +5 -1
package/package.json +1 -6
package/scripts/test/test_genai.json +8 -0
package/scripts/test/test_image_refs.json +10 -0

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import "dotenv/config";
 import { MulmoStudioContext, MulmoBeat, PublicAPIArgs } from "../types/index.js";
 export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
-export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<void>;
+export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs & {
+    langs: string[];
+}) => Promise<void>;
 export declare const audio: (context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<MulmoStudioContext>;

package/lib/actions/audio.js CHANGED Viewed

@@ -40,9 +40,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
     const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
     return getAudioPath(context, beat, audioFile);
 };
-const preprocessor = (namedInputs) => {
-    const { beat, studioBeat, multiLingual, context } = namedInputs;
-    const { lang } = context;
+const preprocessorAgent = (namedInputs) => {
+    const { beat, studioBeat, multiLingual, context, lang } = namedInputs;
+    // const { lang } = context;
     const text = localizedText(beat, multiLingual, lang);
     const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
     const audioPath = getBeatAudioPath(text, context, beat, lang);
@@ -68,13 +68,15 @@ const graph_tts = {
         multiLingual: {},
         context: {},
         __mapIndex: {},
+        lang: {},
         preprocessor: {
-            agent: preprocessor,
+            agent: preprocessorAgent,
             inputs: {
                 beat: ":beat",
                 studioBeat: ":studioBeat",
                 multiLingual: ":multiLingual",
                 context: ":context",
+                lang: ":lang",
             },
         },
         tts: {
@@ -103,6 +105,33 @@ const graph_tts = {
         },
     },
 };
+const graph_tts_map = {
+    version: 0.5,
+    concurrency: 8,
+    nodes: {
+        beat: {},
+        studioBeat: {},
+        multiLingual: {},
+        context: {},
+        __mapIndex: {},
+        langs: {},
+        map: {
+            agent: "mapAgent",
+            inputs: {
+                rows: ":langs",
+                beat: ":beat",
+                studioBeat: ":studioBeat",
+                multiLingual: ":multiLingual",
+                context: ":context",
+                __mapIndex: ":__mapIndex",
+            },
+            params: {
+                rowKey: "lang",
+            },
+            graph: graph_tts,
+        },
+    },
+};
 const graph_data = {
     version: 0.5,
     concurrency: 8,
@@ -119,6 +148,7 @@ const graph_data = {
                 studioBeat: ":context.studio.beats",
                 multiLingual: ":context.multiLingual",
                 context: ":context",
+                lang: ":context.lang",
             },
             params: {
                 rowKey: "beat",
@@ -188,7 +218,7 @@ const audioAgents = {
     combineAudioFilesAgent,
 };
 export const generateBeatAudio = async (index, context, args) => {
-    const { settings, callbacks } = args ?? {};
+    const { settings, callbacks, langs } = args ?? {};
     try {
         MulmoStudioContextMethods.setSessionState(context, "audio", true);
         const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -199,12 +229,18 @@ export const generateBeatAudio = async (index, context, args) => {
         mkdir(audioSegmentDirPath);
         const config = settings2GraphAIConfig(settings);
         const taskManager = new TaskManager(getConcurrency(context));
-        const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager, config });
+        const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
         graph.injectValue("__mapIndex", index);
         graph.injectValue("beat", context.studio.script.beats[index]);
         graph.injectValue("studioBeat", context.studio.beats[index]);
-        graph.injectValue("multiLingual", context.multiLingual);
+        graph.injectValue("multiLingual", context.multiLingual[index]);
         graph.injectValue("context", context);
+        if (langs) {
+            graph.injectValue("langs", langs);
+        }
+        else {
+            graph.injectValue("lang", context.lang);
+        }
         if (callbacks) {
             callbacks.forEach((callback) => {
                 graph.registerCallback(callback);

package/lib/actions/image_references.js CHANGED Viewed

@@ -1,9 +1,9 @@
 import fs from "fs";
 import { GraphAI, GraphAILogger } from "graphai";
-import { getReferenceImagePath } from "../utils/file.js";
+import { getReferenceImagePath, resolveAssetPath } from "../utils/file.js";
 import { getExtention } from "../utils/utils.js";
 import { graphOption } from "./images.js";
-import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
+import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent } from "../agents/index.js";
 // public api
 // Application may call this function directly to generate reference image.
@@ -70,7 +70,7 @@ export const getImageRefs = async (context) => {
         }
         else if (image.type === "image") {
             if (image.source.kind === "path") {
-                imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
+                imageRefs[key] = resolveAssetPath(context, image.source.path);
             }
             else if (image.source.kind === "url") {
                 imageRefs[key] = await downLoadImage(context, key, image.source.url);

package/lib/actions/translate.js CHANGED Viewed

@@ -144,10 +144,18 @@ const translateGraph = {
         mergeStudioResult: {
             isResult: true,
             agent: (namedInputs) => {
-                const { multiLingual, beats } = namedInputs;
+                const { multiLingual, beats, originalMultiLingual } = namedInputs;
                 const multiLingualObject = beats.reduce((tmp, beat, beatIndex) => {
                     const key = beatId(beat?.id, beatIndex);
-                    tmp[key] = multiLingual[beatIndex];
+                    const originalData = originalMultiLingual[beatIndex]?.multiLingualTexts ?? {};
+                    const { multiLingualTexts, cacheKey } = multiLingual[beatIndex];
+                    tmp[key] = {
+                        cacheKey,
+                        multiLingualTexts: {
+                            ...originalData,
+                            ...multiLingualTexts,
+                        },
+                    };
                     return tmp;
                 }, {});
                 return {
@@ -156,7 +164,8 @@ const translateGraph = {
                 };
             },
             inputs: {
-                multiLingual: ":beatsMap.mergeMultiLingualData",
+                originalMultiLingual: ":context.multiLingual", // original
+                multiLingual: ":beatsMap.mergeMultiLingualData", // update
                 beats: ":context.studio.script.beats",
             },
         },

package/lib/agents/image_genai_agent.js CHANGED Viewed

@@ -1,9 +1,10 @@
+import fs from "fs";
 import { GraphAILogger } from "graphai";
 import { getAspectRatio } from "./movie_google_agent.js";
 import { provider2ImageAgent } from "../utils/provider2agent.js";
 import { GoogleGenAI, PersonGeneration } from "@google/genai";
 export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
-    const { prompt } = namedInputs;
+    const { prompt, referenceImages } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? provider2ImageAgent["google"].defaultModel;
     const apiKey = config?.apiKey;
@@ -12,24 +13,53 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
     }
     try {
         const ai = new GoogleGenAI({ apiKey });
-        const response = await ai.models.generateImages({
-            model,
-            prompt,
-            config: {
-                numberOfImages: 1, // default is 4!
-                aspectRatio,
-                personGeneration: PersonGeneration.ALLOW_ALL,
-                // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
-            },
-        });
-        if (!response.generatedImages || response.generatedImages.length === 0) {
-            throw new Error("ERROR: generateImage returned no generated images");
+        if (model === "gemini-2.5-flash-image-preview") {
+            const contents = [{ text: prompt }];
+            referenceImages?.forEach((imagePath) => {
+                const imageData = fs.readFileSync(imagePath);
+                const base64Image = imageData.toString("base64");
+                contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
+            });
+            // NOTE: There is no way to specify the aspect ratio for Gemini.
+            const response = await ai.models.generateContent({ model, contents });
+            if (!response.candidates?.[0]?.content?.parts) {
+                throw new Error("ERROR: generateContent returned no candidates");
+            }
+            for (const part of response.candidates[0].content.parts) {
+                if (part.text) {
+                    GraphAILogger.info("Gemini image generation response:", part.text);
+                }
+                else if (part.inlineData) {
+                    const imageData = part.inlineData.data;
+                    if (!imageData) {
+                        throw new Error("ERROR: generateContent returned no image data");
+                    }
+                    const buffer = Buffer.from(imageData, "base64");
+                    return { buffer };
+                }
+            }
+            throw new Error("ERROR: generateContent returned no image data");
         }
-        const image = response.generatedImages[0].image;
-        if (image && image.imageBytes) {
-            return { buffer: Buffer.from(image.imageBytes, "base64") };
+        else {
+            const response = await ai.models.generateImages({
+                model,
+                prompt,
+                config: {
+                    numberOfImages: 1, // default is 4!
+                    aspectRatio,
+                    personGeneration: PersonGeneration.ALLOW_ALL,
+                    // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
+                },
+            });
+            if (!response.generatedImages || response.generatedImages.length === 0) {
+                throw new Error("ERROR: generateImage returned no generated images");
+            }
+            const image = response.generatedImages[0].image;
+            if (image && image.imageBytes) {
+                return { buffer: Buffer.from(image.imageBytes, "base64") };
+            }
+            throw new Error("ERROR: generateImage returned no image bytes");
         }
-        throw new Error("ERROR: generateImage returned no image bytes");
     }
     catch (error) {
         GraphAILogger.info("Failed to generate image:", error);

package/lib/index.common.d.ts CHANGED Viewed

@@ -3,5 +3,7 @@ export * from "./utils/provider2agent.js";
 export * from "./utils/const.js";
 export * from "./utils/string.js";
 export * from "./utils/utils.js";
+export * from "./utils/prompt.js";
 export * from "./methods/mulmo_presentation_style.js";
 export * from "./methods/mulmo_script.js";
+export * from "./methods/mulmo_studio_context.js";

package/lib/index.common.js CHANGED Viewed

@@ -4,5 +4,7 @@ export * from "./utils/provider2agent.js";
 export * from "./utils/const.js";
 export * from "./utils/string.js";
 export * from "./utils/utils.js";
+export * from "./utils/prompt.js";
 export * from "./methods/mulmo_presentation_style.js";
 export * from "./methods/mulmo_script.js";
+export * from "./methods/mulmo_studio_context.js";

package/lib/methods/mulmo_media_source.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import fs from "fs";
-import { getFullPath } from "../utils/file.js";
-import { MulmoStudioContextMethods } from "../methods/index.js";
+import { getFullPath, resolveAssetPath } from "../utils/file.js";
 export const MulmoMediaSourceMethods = {
     async getText(mediaSource, context) {
         if (mediaSource.kind === "text") {
@@ -23,7 +22,7 @@ export const MulmoMediaSourceMethods = {
         if (!mediaSource)
             return null;
         if (mediaSource.kind === "path") {
-            return MulmoStudioContextMethods.resolveAssetPath(context, mediaSource.path);
+            return resolveAssetPath(context, mediaSource.path);
         }
         if (mediaSource.kind === "url") {
             return mediaSource.url;

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -1,3 +1,8 @@
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
 export declare const MulmoPresentationStyleMethods: {
     getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;

package/lib/methods/mulmo_presentation_style.js CHANGED Viewed

@@ -1,4 +1,8 @@
-// node & browser
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { isNull } from "graphai";
 import { userAssert } from "../utils/utils.js";
 import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";

package/lib/methods/mulmo_script.d.ts CHANGED Viewed

@@ -1,3 +1,8 @@
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { type MulmoStudioBeat, type MulmoScript, type MulmoStudioMultiLingual } from "../types/index.js";
 export declare const MulmoScriptMethods: {
     validate(script: any): MulmoScript;

package/lib/methods/mulmo_script.js CHANGED Viewed

@@ -1,4 +1,8 @@
-// node & browser
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { GraphAILogger } from "graphai";
 import { mulmoScriptSchema, mulmoStudioMultiLingualFileSchema } from "../types/index.js";
 import { beatId } from "../utils/utils.js";

package/lib/methods/mulmo_studio_context.d.ts CHANGED Viewed

@@ -1,8 +1,12 @@
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType } from "../types/index.js";
 export declare const addSessionProgressCallback: (cb: SessionProgressCallback) => void;
 export declare const removeSessionProgressCallback: (cb: SessionProgressCallback) => void;
 export declare const MulmoStudioContextMethods: {
-    resolveAssetPath(context: MulmoStudioContext, relativePath: string): string;
     getAudioDirPath(context: MulmoStudioContext): string;
     getImageDirPath(context: MulmoStudioContext): string;
     getImageProjectDirPath(context: MulmoStudioContext): string;

package/lib/methods/mulmo_studio_context.js CHANGED Viewed

@@ -1,4 +1,8 @@
-import path from "path";
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { beatId } from "../utils/utils.js";
 import { GraphAILogger } from "graphai";
 const sessionProgressCallbacks = new Set();
@@ -25,9 +29,6 @@ const notifyBeatStateChange = (context, sessionType, id) => {
     }
 };
 export const MulmoStudioContextMethods = {
-    resolveAssetPath(context, relativePath) {
-        return path.resolve(context.fileDirs.mulmoFileDirPath, relativePath);
-    },
     getAudioDirPath(context) {
         return context.fileDirs.audioDirPath;
     },

package/lib/types/agent.d.ts CHANGED Viewed

@@ -27,7 +27,9 @@ export type AgentErrorResult = {
 export type AgentConfig = {
     apiKey?: string;
 };
-export type ImageAgentInputs = AgentPromptInputs;
+export type ImageAgentInputs = AgentPromptInputs & {
+    referenceImages: string[] | null | undefined;
+};
 export type OpenAIImageAgentInputs = AgentPromptInputs & {
     referenceImages: string[] | null | undefined;
 };

package/lib/utils/file.d.ts CHANGED Viewed

@@ -38,6 +38,7 @@ export declare const getCaptionImagePath: (context: MulmoStudioContext, index: n
 export declare const getOutputPdfFilePath: (outDirPath: string, fileName: string, pdfMode: PDFMode, lang?: string) => string;
 export declare const getPromptTemplateFilePath: (promptTemplateName: string) => string;
 export declare const mkdir: (dirPath: string) => void;
+export declare const resolveAssetPath: (context: MulmoStudioContext, relativePath: string) => string;
 export declare const silent60secPath: () => string;
 export declare const defaultBGMPath: () => string;
 export declare const mulmoCreditPath: () => string;

package/lib/utils/file.js CHANGED Viewed

@@ -128,6 +128,10 @@ export const mkdir = (dirPath) => {
         fs.mkdirSync(dirPath, { recursive: true });
     }
 };
+// asset path
+export const resolveAssetPath = (context, relativePath) => {
+    return path.resolve(context.fileDirs.mulmoFileDirPath, relativePath);
+};
 // export const silentPath = path.resolve(npmRoot, "./assets/audio/silent300.mp3");
 // export const silentLastPath = path.resolve(npmRoot, "./assets/audio/silent800.mp3");
 export const silent60secPath = () => path.resolve(npmRoot, "./assets/audio/silent60sec.mp3");

package/lib/utils/provider2agent.js CHANGED Viewed

@@ -38,7 +38,7 @@ export const provider2ImageAgent = {
     google: {
         agentName: "imageGenAIAgent",
         defaultModel: "imagen-4.0-generate-preview-06-06",
-        models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06"],
+        models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image-preview"],
     },
     mock: {
         agentName: "mediaMockAgent",

package/lib/utils/utils.d.ts CHANGED Viewed

@@ -1,3 +1,8 @@
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import type { ConfigDataDictionary, DefaultConfigData } from "graphai";
 import { MulmoBeat, MulmoStudioBeat, MulmoStudioMultiLingual, MulmoStudioMultiLingualData } from "../types/index.js";
 import { type LLM } from "./provider2agent.js";

package/lib/utils/utils.js CHANGED Viewed

@@ -1,4 +1,8 @@
-// node & browser
+/**
+ * Browser-friendly packages only.
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
+ * Works in both Node.js and modern browsers.
+ */
 import { provider2LLMAgent } from "./provider2agent.js";
 export const llmPair = (_llm, _model) => {
     const llmKey = _llm ?? "openai";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "1.2.11",
+  "version": "1.2.12",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",
@@ -55,7 +55,6 @@
     "format": "prettier --write '{src,scripts,assets/templates,assets/styles,draft,ideason,scripts_mag2,proto,test,batch,graphai,output,docs/scripts}/**/*.{ts,json,yaml}'",
     "deep_research": "npx tsx ./src/tools/deep_research.ts",
     "template": "npx tsx batch/template2tsobject.ts && yarn run format",
-    "fake_data": "npx tsx test/fake/sample.ts",
     "mcp_server": "npx tsx ./src/mcp/server.ts"
   },
   "repository": "git+ssh://git@github.com/receptron/mulmocast-cli.git",
@@ -81,7 +80,6 @@
     "@inquirer/select": "^4.3.2",
     "@modelcontextprotocol/sdk": "^1.17.4",
     "@tavily/core": "^0.5.11",
-    "canvas": "^3.2.0",
     "clipboardy": "^4.0.0",
     "dotenv": "^17.2.1",
     "fluent-ffmpeg": "^2.1.3",
@@ -96,8 +94,6 @@
     "zod-to-json-schema": "^3.24.6"
   },
   "devDependencies": {
-    "@anatine/zod-mock": "^3.14.0",
-    "@faker-js/faker": "^9.9.0",
     "@receptron/test_utils": "^2.0.3",
     "@types/fluent-ffmpeg": "^2.1.26",
     "@types/yargs": "^17.0.33",
@@ -106,7 +102,6 @@
     "eslint-plugin-prettier": "^5.5.4",
     "eslint-plugin-sonarjs": "^3.0.5",
     "prettier": "^3.6.2",
-    "ts-node": "^10.9.2",
     "tsx": "^4.20.5",
     "typescript": "^5.9.2",
     "typescript-eslint": "^8.41.0"

package/scripts/test/test_genai.json CHANGED Viewed

@@ -9,6 +9,14 @@
   },
   "lang": "en",
   "beats": [
+    {
+      "id": "gemini_2_5_flash_image_preview",
+      "text": "image generated by gemini-2.5-flash-image-preview",
+      "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
+      "imageParams": {
+        "model": "gemini-2.5-flash-image-preview"
+      }
+    },
     {
       "id": "imagen_3",
       "text": "image generated by imagen-3",

package/scripts/test/test_image_refs.json CHANGED Viewed

@@ -45,6 +45,16 @@
       "text": "Hello World with no reference image",
       "imagePrompt": "Saying hello to the world",
       "imageNames": []
+    },
+    {
+      "id": "gemini_2_5_flash_image_preview",
+      "text": "Hello World with a witch and a broom with Gemini",
+      "imagePrompt": "Saying hello to the world",
+      "imageNames": ["witch", "broom"],
+      "imageParams": {
+        "provider": "google",
+        "model": "gemini-2.5-flash-image-preview"
+      }
     }
   ]
 }