npm - mulmocast - Versions diffs - 1.1.5 → 1.1.7 - Mend

mulmocast 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/lib/actions/audio.js +10 -1
package/lib/actions/image_agents.d.ts +3 -12
package/lib/actions/image_agents.js +12 -8
package/lib/actions/images.js +2 -1
package/lib/actions/translate.d.ts +51 -2
package/lib/actions/translate.js +193 -148
package/lib/agents/combine_audio_files_agent.js +1 -1
package/lib/agents/lipsync_replicate_agent.js +10 -3
package/lib/agents/tts_nijivoice_agent.js +1 -1
package/lib/cli/commands/audio/handler.js +1 -1
package/lib/cli/commands/image/handler.js +1 -1
package/lib/cli/commands/movie/handler.js +1 -1
package/lib/cli/commands/pdf/handler.js +1 -1
package/lib/cli/helpers.d.ts +1 -4
package/lib/cli/helpers.js +3 -2
package/lib/index.common.d.ts +1 -0
package/lib/index.common.js +1 -0
package/lib/mcp/server.js +1 -1
package/lib/methods/mulmo_presentation_style.d.ts +3 -2
package/lib/methods/mulmo_script.d.ts +4 -1
package/lib/methods/mulmo_script.js +18 -2
package/lib/methods/mulmo_studio_context.d.ts +1 -0
package/lib/methods/mulmo_studio_context.js +8 -0
package/lib/types/agent.d.ts +1 -0
package/lib/types/schema.d.ts +326 -230
package/lib/types/schema.js +10 -3
package/lib/types/type.d.ts +3 -2
package/lib/utils/const.d.ts +1 -0
package/lib/utils/const.js +2 -1
package/lib/utils/context.d.ts +393 -50
package/lib/utils/context.js +90 -57
package/lib/utils/filters.d.ts +1 -0
package/lib/utils/filters.js +8 -0
package/lib/utils/image_plugins/mermaid.js +1 -1
package/lib/utils/image_plugins/source.js +1 -1
package/lib/utils/preprocess.d.ts +2 -2
package/lib/utils/preprocess.js +3 -3
package/lib/utils/provider2agent.d.ts +3 -2
package/lib/utils/provider2agent.js +20 -2
package/lib/utils/string.d.ts +1 -1
package/lib/utils/string.js +12 -8
package/lib/utils/utils.js +2 -6
package/package.json +2 -2
package/scripts/templates/image_refs.json +1 -0
package/scripts/templates/voice_over.json +1 -0
package/scripts/test/gpt.json +1 -0
package/scripts/test/test1.json +1 -0
package/scripts/test/test_audio.json +1 -0
package/scripts/test/test_audio_instructions.json +1 -0
package/scripts/test/test_beats.json +1 -0
package/scripts/test/test_captions.json +1 -0
package/scripts/test/test_elevenlabs_models.json +1 -0
package/scripts/test/test_hello.json +1 -0
package/scripts/test/test_hello_google.json +1 -0
package/scripts/test/test_html.json +1 -0
package/scripts/test/test_image_refs.json +1 -0
package/scripts/test/test_images.json +1 -0
package/scripts/test/test_lang.json +58 -2
package/scripts/test/test_layout.json +1 -0
package/scripts/test/test_lipsync.json +9 -0
package/scripts/test/test_loop.json +1 -0
package/scripts/test/test_media.json +1 -0
package/scripts/test/test_mixed_providers.json +1 -0
package/scripts/test/test_movie.json +1 -0
package/scripts/test/test_no_audio.json +1 -0
package/scripts/test/test_no_audio_with_credit.json +1 -0
package/scripts/test/test_order.json +1 -0
package/scripts/test/test_order_portrait.json +1 -0
package/scripts/test/test_replicate.json +19 -0
package/scripts/test/test_slideout_left_no_audio.json +1 -0
package/scripts/test/test_spillover.json +1 -0
package/scripts/test/test_transition.json +1 -0
package/scripts/test/test_transition_no_audio.json +1 -0
package/scripts/test/test_video_speed.json +1 -0
package/scripts/test/test_voice_over.json +1 -0
package/scripts/test/test_voices.json +1 -0
package/scripts/templates/image_prompt_only_template.ts +0 -95

package/lib/actions/audio.js CHANGED Viewed

@@ -10,7 +10,7 @@ import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { text2SpeechProviderSchema } from "../types/index.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
+import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
 import { provider2TTSAgent } from "../utils/provider2agent.js";
@@ -58,6 +58,8 @@ const preprocessor = (namedInputs) => {
         voiceId,
         speechOptions,
         model,
+        provider,
+        lang,
         audioPath,
         studioBeat,
         needsTTS,
@@ -84,6 +86,8 @@ const graph_tts = {
             agent: ":preprocessor.ttsAgent",
             inputs: {
                 text: ":preprocessor.text",
+                provider: ":preprocessor.provider",
+                lang: ":preprocessor.lang",
                 cache: {
                     force: [":context.force"],
                     file: ":preprocessor.audioPath",
@@ -173,6 +177,11 @@ const agentFilters = [
         agent: fileCacheAgentFilter,
         nodeIds: ["tts"],
     },
+    {
+        name: "nijovoiceTextAgentFilter",
+        agent: nijovoiceTextAgentFilter,
+        nodeIds: ["tts"],
+    },
 ];
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)

package/lib/actions/image_agents.d.ts CHANGED Viewed

@@ -23,10 +23,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     lipSyncFile?: string;
     lipSyncModel?: string;
-    lipSyncAgentInfo?: {
-        agentName: string;
-        defaultModel: string;
-    };
+    lipSyncAgentName?: string;
     audioFile?: string;
     beatDuration?: number;
     htmlPrompt?: undefined;
@@ -61,10 +58,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     lipSyncFile?: string;
     lipSyncModel?: string;
-    lipSyncAgentInfo?: {
-        agentName: string;
-        defaultModel: string;
-    };
+    lipSyncAgentName?: string;
     audioFile?: string;
     beatDuration?: number;
     htmlPrompt?: undefined;
@@ -102,10 +96,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
     };
     lipSyncFile?: string;
     lipSyncModel?: string;
-    lipSyncAgentInfo?: {
-        agentName: string;
-        defaultModel: string;
-    };
+    lipSyncAgentName?: string;
     audioFile?: string;
     beatDuration?: number;
     htmlPrompt?: undefined;

package/lib/actions/image_agents.js CHANGED Viewed

@@ -25,16 +25,20 @@ export const imagePreprocessAgent = async (namedInputs) => {
         movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
         beatDuration: beat.duration ?? studioBeat?.duration,
     };
-    if (beat.soundEffectPrompt) {
-        returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
-        returnValue.soundEffectModel =
-            beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
-        returnValue.soundEffectFile = moviePaths.soundEffectFile;
-        returnValue.soundEffectPrompt = beat.soundEffectPrompt;
+    const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
+    if (isMovie) {
+        if (beat.soundEffectPrompt) {
+            returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
+            returnValue.soundEffectModel =
+                beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
+            returnValue.soundEffectFile = moviePaths.soundEffectFile;
+            returnValue.soundEffectPrompt = beat.soundEffectPrompt;
+        }
     }
     if (beat.enableLipSync) {
-        returnValue.lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
-        returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? returnValue.lipSyncAgentInfo.defaultModel;
+        const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
+        returnValue.lipSyncAgentName = lipSyncAgentInfo.agentName;
+        returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? lipSyncAgentInfo.defaultModel;
         returnValue.lipSyncFile = moviePaths.lipSyncFile;
         // Audio file will be set from the beat's audio file when available
         returnValue.audioFile = studioBeat?.audioFile;

package/lib/actions/images.js CHANGED Viewed

@@ -218,10 +218,11 @@ const beat_graph_data = {
         },
         lipSyncGenerator: {
             if: ":beat.enableLipSync",
-            agent: ":preprocessor.lipSyncAgentInfo.agentName",
+            agent: ":preprocessor.lipSyncAgentName",
             inputs: {
                 onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
                 movieFile: ":preprocessor.movieFile",
+                imageFile: ":preprocessor.referenceImageForMovie",
                 audioFile: ":preprocessor.audioFile",
                 lipSyncFile: ":preprocessor.lipSyncFile",
                 params: {

package/lib/actions/translate.d.ts CHANGED Viewed

@@ -1,7 +1,56 @@
 import "dotenv/config";
 import type { CallbackFunction } from "graphai";
-import { MulmoStudioContext } from "../types/index.js";
+import { LANG, LocalizedText, MulmoStudioContext } from "../types/index.js";
+export declare const translateTextGraph: {
+    version: number;
+    nodes: {
+        localizedText: {
+            inputs: {
+                targetLang: string;
+                beat: string;
+                multiLingual: string;
+                lang: string;
+                beatIndex: string;
+                mulmoContext: string;
+                system: string;
+                prompt: string[];
+            };
+            passThrough: {
+                lang: string;
+            };
+            output: {
+                text: string;
+            };
+            agent: string;
+        };
+        splitText: {
+            agent: (namedInputs: {
+                localizedText: LocalizedText;
+                targetLang: LANG;
+            }) => string[];
+            inputs: {
+                targetLang: string;
+                localizedText: string;
+            };
+        };
+        textTranslateResult: {
+            isResult: boolean;
+            agent: string;
+            inputs: {
+                lang: string;
+                text: string;
+                texts: string;
+                ttsTexts: string;
+                cacheKey: string;
+            };
+        };
+    };
+};
+export declare const translateBeat: (index: number, context: MulmoStudioContext, targetLangs: string[], args?: {
+    settings?: Record<string, string>;
+    callbacks?: CallbackFunction[];
+}) => Promise<void>;
 export declare const translate: (context: MulmoStudioContext, args?: {
     callbacks?: CallbackFunction[];
     settings?: Record<string, string>;
-}) => Promise<void>;
+}) => Promise<MulmoStudioContext>;

package/lib/actions/translate.js CHANGED Viewed

@@ -1,34 +1,165 @@
 import "dotenv/config";
-import { GraphAI, assert } from "graphai";
+import { createHash } from "crypto";
+import fs from "fs";
+import { GraphAI, assert, isNull, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
+import { recursiveSplitJa } from "../utils/string.js";
 import { settings2GraphAIConfig } from "../utils/utils.js";
+import { getMultiLingual } from "../utils/context.js";
+import { currentMulmoScriptVersion } from "../utils/const.js";
 import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const vanillaAgents = agents.default ?? agents;
-const translateGraph = {
+const hashSHA256 = (text) => {
+    return createHash("sha256").update(text, "utf8").digest("hex");
+};
+// 1. translateGraph / map each beats.
+// 2. beatGraph / map each target lang.
+// 3. translateTextGraph / translate text.
+export const translateTextGraph = {
     version: 0.5,
     nodes: {
+        localizedText: {
+            inputs: {
+                targetLang: ":targetLang", // for cache
+                beat: ":beat", // for cache
+                multiLingual: ":multiLingual", // for cache
+                lang: ":lang", // for cache
+                beatIndex: ":beatIndex", // for cache (state)
+                mulmoContext: ":context", // for cache (state)
+                system: translateSystemPrompt,
+                prompt: translatePrompts,
+            },
+            passThrough: {
+                lang: ":targetLang",
+            },
+            output: {
+                text: ".text",
+            },
+            // return { lang, text } <- localizedText
+            agent: "openAIAgent",
+        },
+        splitText: {
+            agent: (namedInputs) => {
+                const { localizedText, targetLang } = namedInputs;
+                // Cache
+                if (localizedText.texts) {
+                    return localizedText.texts;
+                }
+                if (targetLang === "ja") {
+                    return recursiveSplitJa(localizedText.text);
+                }
+                // not split
+                return [localizedText.text];
+            },
+            inputs: {
+                targetLang: ":targetLang",
+                localizedText: ":localizedText",
+            },
+        },
+        textTranslateResult: {
+            isResult: true,
+            agent: "copyAgent",
+            inputs: {
+                lang: ":targetLang",
+                text: ":localizedText.text",
+                texts: ":splitText",
+                ttsTexts: ":splitText",
+                cacheKey: ":multiLingual.cacheKey",
+            },
+        },
+    },
+};
+const beatGraph = {
+    version: 0.5,
+    nodes: {
+        targetLangs: {},
         context: {},
-        defaultLang: {},
-        outDirPath: {},
-        outputMultilingualFilePath: {},
-        lang: {
-            agent: "stringUpdateTextAgent",
+        beat: {},
+        __mapIndex: {},
+        // for cache
+        multiLingual: {
+            agent: (namedInputs) => {
+                const { multiLinguals, beatIndex, text } = namedInputs;
+                const cacheKey = hashSHA256(text ?? "");
+                const multiLingual = multiLinguals?.[beatIndex];
+                if (!multiLingual) {
+                    return { cacheKey, multiLingualTexts: {} };
+                }
+                return {
+                    multiLingualTexts: Object.keys(multiLingual.multiLingualTexts).reduce((tmp, lang) => {
+                        if (multiLingual.multiLingualTexts[lang].cacheKey === cacheKey) {
+                            tmp[lang] = multiLingual.multiLingualTexts[lang];
+                        }
+                        return tmp;
+                    }, {}),
+                    cacheKey,
+                };
+            },
             inputs: {
-                newText: ":context.studio.script.lang",
-                oldText: ":defaultLang",
+                text: ":beat.text",
+                beatIndex: ":__mapIndex",
+                multiLinguals: ":context.multiLingual",
             },
         },
-        targetLangs: {}, // TODO
-        mergeStudioResult: {
+        preprocessMultiLingual: {
+            agent: "mapAgent",
+            inputs: {
+                beat: ":beat",
+                multiLingual: ":multiLingual",
+                rows: ":targetLangs",
+                lang: ":context.studio.script.lang",
+                context: ":context",
+                beatIndex: ":__mapIndex",
+            },
+            params: {
+                compositeResult: true,
+                rowKey: "targetLang",
+            },
+            graph: translateTextGraph,
+        },
+        mergeLocalizedText: {
+            // console: { after: true},
+            agent: "arrayToObjectAgent",
+            inputs: {
+                items: ":preprocessMultiLingual.textTranslateResult",
+            },
+            params: {
+                key: "lang",
+            },
+        },
+        multiLingualTexts: {
+            agent: "mergeObjectAgent",
+            inputs: {
+                items: [":multiLingual.multiLingualTexts", ":mergeLocalizedText"],
+            },
+        },
+        mergeMultiLingualData: {
             isResult: true,
+            // console: { after: true},
             agent: "mergeObjectAgent",
             inputs: {
-                items: [{ multiLingual: ":beatsMap.mergeMultiLingualData" }],
+                items: [":multiLingual", { multiLingualTexts: ":multiLingualTexts" }],
+            },
+        },
+    },
+};
+const translateGraph = {
+    version: 0.5,
+    nodes: {
+        context: {},
+        outDirPath: {},
+        outputMultilingualFilePath: {},
+        targetLangs: {},
+        mergeStudioResult: {
+            isResult: true,
+            agent: "copyAgent",
+            inputs: {
+                version: "1.1",
+                multiLingual: ":beatsMap.mergeMultiLingualData",
             },
         },
         beatsMap: {
@@ -37,139 +168,18 @@ const translateGraph = {
                 targetLangs: ":targetLangs",
                 context: ":context",
                 rows: ":context.studio.script.beats",
-                lang: ":lang",
             },
             params: {
                 rowKey: "beat",
                 compositeResult: true,
             },
-            graph: {
-                version: 0.5,
-                nodes: {
-                    // for cache
-                    multiLingual: {
-                        agent: (namedInputs) => {
-                            return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
-                        },
-                        inputs: {
-                            index: ":__mapIndex",
-                            rows: ":context.multiLingual",
-                        },
-                    },
-                    preprocessMultiLingual: {
-                        agent: "mapAgent",
-                        inputs: {
-                            beat: ":beat",
-                            multiLingual: ":multiLingual",
-                            rows: ":targetLangs",
-                            lang: ":lang.text",
-                            context: ":context",
-                            beatIndex: ":__mapIndex",
-                        },
-                        params: {
-                            compositeResult: true,
-                            rowKey: "targetLang",
-                        },
-                        graph: {
-                            version: 0.5,
-                            nodes: {
-                                localizedTexts: {
-                                    inputs: {
-                                        targetLang: ":targetLang", // for cache
-                                        beat: ":beat", // for cache
-                                        multiLingual: ":multiLingual", // for cache
-                                        lang: ":lang", // for cache
-                                        beatIndex: ":beatIndex", // for cache
-                                        mulmoContext: ":context", // for cache
-                                        system: translateSystemPrompt,
-                                        prompt: translatePrompts,
-                                    },
-                                    passThrough: {
-                                        lang: ":targetLang",
-                                    },
-                                    output: {
-                                        text: ".text",
-                                    },
-                                    // return { lang, text } <- localizedText
-                                    agent: "openAIAgent",
-                                },
-                                splitText: {
-                                    agent: (namedInputs) => {
-                                        const { localizedText, targetLang } = namedInputs;
-                                        // Cache
-                                        if (localizedText.texts) {
-                                            return localizedText;
-                                        }
-                                        if (targetLang === "ja") {
-                                            return {
-                                                ...localizedText,
-                                                texts: recursiveSplitJa(localizedText.text),
-                                            };
-                                        }
-                                        // not split
-                                        return {
-                                            ...localizedText,
-                                            texts: [localizedText.text],
-                                        };
-                                        // return { lang, text, texts }
-                                    },
-                                    inputs: {
-                                        targetLang: ":targetLang",
-                                        localizedText: ":localizedTexts",
-                                    },
-                                },
-                                ttsTexts: {
-                                    agent: (namedInputs) => {
-                                        const { localizedText, targetLang } = namedInputs;
-                                        // cache
-                                        if (localizedText.ttsTexts) {
-                                            return localizedText;
-                                        }
-                                        if (targetLang === "ja") {
-                                            return {
-                                                ...localizedText,
-                                                ttsTexts: localizedText?.texts?.map((text) => replacePairsJa(text, replacementsJa)),
-                                            };
-                                        }
-                                        return {
-                                            ...localizedText,
-                                            ttsTexts: localizedText.texts,
-                                        };
-                                    },
-                                    inputs: {
-                                        targetLang: ":targetLang",
-                                        localizedText: ":splitText",
-                                    },
-                                    isResult: true,
-                                },
-                            },
-                        },
-                    },
-                    mergeLocalizedText: {
-                        agent: "arrayToObjectAgent",
-                        inputs: {
-                            items: ":preprocessMultiLingual.ttsTexts",
-                        },
-                        params: {
-                            key: "lang",
-                        },
-                    },
-                    mergeMultiLingualData: {
-                        isResult: true,
-                        agent: "mergeObjectAgent",
-                        inputs: {
-                            items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
-                        },
-                    },
-                },
-            },
+            graph: beatGraph,
         },
         writeOutput: {
-            // console: { before: true },
             agent: "fileWriteAgent",
             inputs: {
                 file: ":outputMultilingualFilePath",
-                text: ":mergeStudioResult.multiLingual.toJSON()",
+                text: ":mergeStudioResult.toJSON()",
             },
         },
     },
@@ -180,18 +190,14 @@ const localizedTextCacheAgentFilter = async (context, next) => {
     if (!beat.text) {
         return { text: "" };
     }
-    // The original text is unchanged and the target language text is present
-    if (multiLingual.multiLingualTexts &&
-        multiLingual.multiLingualTexts[lang] &&
-        multiLingual.multiLingualTexts[lang].text === beat.text &&
-        multiLingual.multiLingualTexts[targetLang] &&
-        multiLingual.multiLingualTexts[targetLang].text) {
-        return { text: multiLingual.multiLingualTexts[targetLang].text };
-    }
     // same language
     if (targetLang === lang) {
         return { text: beat.text };
     }
+    // The original text is unchanged and the target language text is present
+    if (multiLingual.cacheKey === multiLingual.multiLingualTexts[targetLang]?.cacheKey) {
+        return { text: multiLingual.multiLingualTexts[targetLang].text };
+    }
     try {
         MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
         return await next(context);
@@ -204,11 +210,49 @@ const agentFilters = [
     {
         name: "localizedTextCacheAgentFilter",
         agent: localizedTextCacheAgentFilter,
-        nodeIds: ["localizedTexts"],
+        nodeIds: ["localizedText"],
     },
 ];
-const defaultLang = "en";
-const targetLangs = ["ja", "en"];
+export const translateBeat = async (index, context, targetLangs, args) => {
+    const { settings, callbacks } = args ?? {};
+    // Validate inputs
+    if (index < 0 || index >= context.studio.script.beats.length) {
+        throw new Error(`Invalid beat index: ${index}. Must be between 0 and ${context.studio.script.beats.length - 1}`);
+    }
+    if (!targetLangs || targetLangs.length === 0) {
+        throw new Error("targetLangs must be a non-empty array");
+    }
+    try {
+        const fileName = MulmoStudioContextMethods.getFileName(context);
+        const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
+        const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
+        mkdir(outDirPath);
+        const config = settings2GraphAIConfig(settings, process.env);
+        assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
+        const graph = new GraphAI(beatGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
+        graph.injectValue("context", context);
+        graph.injectValue("targetLangs", targetLangs);
+        graph.injectValue("beat", context.studio.script.beats[index]);
+        graph.injectValue("__mapIndex", index);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
+        const results = await graph.run();
+        const multiLingual = getMultiLingual(outputMultilingualFilePath, context.studio.beats.length);
+        multiLingual[index] = results.mergeMultiLingualData;
+        const data = {
+            version: currentMulmoScriptVersion,
+            multiLingual,
+        };
+        fs.writeFileSync(outputMultilingualFilePath, JSON.stringify(data, null, 2), "utf8");
+        writingMessage(outputMultilingualFilePath);
+    }
+    catch (error) {
+        GraphAILogger.log(error);
+    }
+};
 export const translate = async (context, args) => {
     const { settings, callbacks } = args ?? {};
     try {
@@ -217,11 +261,11 @@ export const translate = async (context, args) => {
         const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
         const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
         mkdir(outDirPath);
+        const targetLangs = [...new Set([context.lang, context.studio.script.captionParams?.lang].filter((x) => !isNull(x)))];
         const config = settings2GraphAIConfig(settings, process.env);
         assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
         const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
         graph.injectValue("context", context);
-        graph.injectValue("defaultLang", defaultLang);
         graph.injectValue("targetLangs", targetLangs);
         graph.injectValue("outDirPath", outDirPath);
         graph.injectValue("outputMultilingualFilePath", outputMultilingualFilePath);
@@ -239,4 +283,5 @@ export const translate = async (context, args) => {
     finally {
         MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
     }
+    return context;
 };

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -94,7 +94,7 @@ const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations,
         if (voiceStartAt) {
             const remainingDuration = movieDuration - voiceStartAt;
             const duration = remaining - remainingDuration;
-            userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
+            userAssert(duration >= 0, `Invalid startAt: At index(${idx}), available duration(${duration}) < 0`);
             beatDurations.push(duration);
             subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
             userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);

package/lib/agents/lipsync_replicate_agent.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { GraphAILogger } from "graphai";
 import Replicate from "replicate";
 import { provider2LipSyncAgent } from "../utils/provider2agent.js";
 export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
-    const { movieFile, audioFile } = namedInputs;
+    const { movieFile, audioFile, imageFile } = namedInputs;
     const apiKey = config?.apiKey;
     const model = params.model ?? provider2LipSyncAgent.replicate.defaultModel;
     if (!apiKey) {
@@ -12,10 +12,12 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
     const replicate = new Replicate({
         auth: apiKey,
     });
-    const videoBuffer = readFileSync(movieFile);
+    const videoBuffer = movieFile ? readFileSync(movieFile) : undefined;
     const audioBuffer = readFileSync(audioFile);
-    const videoUri = `data:video/quicktime;base64,${videoBuffer.toString("base64")}`;
+    const imageBuffer = imageFile ? readFileSync(imageFile) : undefined;
+    const videoUri = videoBuffer ? `data:video/quicktime;base64,${videoBuffer.toString("base64")}` : undefined;
     const audioUri = `data:audio/wav;base64,${audioBuffer.toString("base64")}`;
+    const imageUri = imageBuffer ? `data:image/png;base64,${imageBuffer.toString("base64")}` : undefined;
     const input = {
         video: undefined,
         video_input: undefined,
@@ -23,6 +25,7 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
         audio: undefined,
         audio_input: undefined,
         audio_file: undefined,
+        image: undefined,
     };
     const modelParams = provider2LipSyncAgent.replicate.modelParams[model];
     if (!modelParams) {
@@ -30,12 +33,16 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
     }
     const videoParam = modelParams.video;
     const audioParam = modelParams.audio;
+    const imageParam = modelParams.image;
     if (videoParam === "video" || videoParam === "video_input" || videoParam === "video_url") {
         input[videoParam] = videoUri;
     }
     if (audioParam === "audio" || audioParam === "audio_input" || audioParam === "audio_file") {
         input[audioParam] = audioUri;
     }
+    if (imageParam === "image") {
+        input[imageParam] = imageUri;
+    }
     const model_identifier = provider2LipSyncAgent.replicate.modelParams[model]?.identifier ?? model;
     try {
         const output = await replicate.run(model_identifier, {

package/lib/agents/tts_nijivoice_agent.js CHANGED Viewed

@@ -27,7 +27,7 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
     try {
         const voiceRes = await fetch(url, options);
         const voiceJson = await voiceRes.json();
-        if (voiceJson && voiceJson.generatedVoice && voiceJson.generatedVoice.audioFileDownloadUrl) {
+        if (voiceJson?.generatedVoice?.audioFileDownloadUrl) {
             const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
             const buffer = Buffer.from(await audioRes.arrayBuffer());
             return { buffer };

package/lib/cli/commands/audio/handler.js CHANGED Viewed

@@ -5,6 +5,6 @@ export const handler = async (argv) => {
     if (!context) {
         process.exit(1);
     }
-    await runTranslateIfNeeded(context, argv);
+    await runTranslateIfNeeded(context);
     await audio(context);
 };