npm - mulmocast - Versions diffs - 0.0.6 → 0.0.7 - Mend

mulmocast 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +39 -2
package/assets/templates/business.json +2 -181
package/assets/templates/children_book.json +1 -128
package/assets/templates/coding.json +2 -136
package/assets/templates/comic_strips.json +1 -30
package/assets/templates/ghibli_strips.json +1 -30
package/assets/templates/sensei_and_taro.json +1 -118
package/lib/actions/audio.js +41 -31
package/lib/actions/captions.js +39 -26
package/lib/actions/images.js +31 -11
package/lib/actions/movie.js +30 -21
package/lib/actions/pdf.js +11 -1
package/lib/actions/translate.js +33 -18
package/lib/agents/combine_audio_files_agent.js +19 -8
package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
package/lib/cli/commands/tool/scripting/builder.js +4 -4
package/lib/cli/commands/tool/scripting/handler.d.ts +2 -1
package/lib/cli/commands/tool/scripting/handler.js +3 -3
package/lib/cli/commands/tool/story_to_script/builder.d.ts +3 -1
package/lib/cli/commands/tool/story_to_script/builder.js +12 -4
package/lib/cli/commands/tool/story_to_script/handler.d.ts +3 -1
package/lib/cli/commands/tool/story_to_script/handler.js +4 -3
package/lib/methods/mulmo_script_template.d.ts +2 -2
package/lib/methods/mulmo_script_template.js +2 -2
package/lib/methods/mulmo_studio.d.ts +8 -0
package/lib/methods/mulmo_studio.js +24 -0
package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
package/lib/tools/create_mulmo_script_from_url.js +7 -7
package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
package/lib/tools/create_mulmo_script_interactively.js +8 -8
package/lib/tools/story_to_script.d.ts +5 -3
package/lib/tools/story_to_script.js +90 -16
package/lib/types/schema.d.ts +320 -1766
package/lib/types/schema.js +41 -2
package/lib/types/type.d.ts +4 -2
package/lib/utils/const.d.ts +4 -0
package/lib/utils/const.js +4 -0
package/lib/utils/file.d.ts +1 -0
package/lib/utils/file.js +16 -4
package/lib/utils/filters.js +16 -11
package/lib/utils/markdown.js +0 -2
package/lib/utils/preprocess.d.ts +34 -15
package/lib/utils/preprocess.js +3 -2
package/lib/utils/prompt.d.ts +2 -1
package/lib/utils/prompt.js +20 -3
package/lib/utils/utils.d.ts +8 -5
package/lib/utils/utils.js +27 -17
package/package.json +2 -2

package/assets/templates/sensei_and_taro.json CHANGED Viewed

@@ -2,122 +2,5 @@
   "title": "Student and Teacher",
   "description": "Interactive discussion between a student and teacher",
   "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
-  "script": {
-    "$mulmocast": {
-      "version": "1.0",
-      "credit": "closing"
-    },
-    "title": "韓国の戒厳令とその日本への影響",
-    "description": "韓国で最近発令された戒厳令とその可能性のある影響について、また日本の憲法に関する考慮事項との類似点を含めた洞察に満ちた議論。",
-    "lang": "ja",
-    "speechParams": {
-      "provider": "nijivoice",
-      "speakers": {
-        "Announcer": {
-          "displayName": {
-            "ja": "アナウンサー"
-          },
-          "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62"
-        },
-        "Student": {
-          "displayName": {
-            "ja": "生徒"
-          },
-          "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
-        },
-        "Teacher": {
-          "displayName": {
-            "ja": "先生"
-          },
-          "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
-        }
-      }
-    },
-    "beats": [
-      {
-        "speaker": "Announcer",
-        "text": "今日は、韓国で起きた戒厳令について、太郎くんが先生に聞きます。"
-      },
-      {
-        "speaker": "Student",
-        "text": "先生、今日は韓国で起きた戒厳令のことを教えてもらえますか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "もちろんだよ、太郎くん。韓国で最近、大統領が「戒厳令」っていうのを突然宣言したんだ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "戒厳令ってなんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "簡単に言うと、国がすごく危ない状態にあるとき、軍隊を使って人々の自由を制限するためのものなんだ。たとえば、政治活動を禁止したり、人の集まりを取り締まったりするんだよ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "それって怖いですね。なんでそんなことをしたんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "大統領は「国会がうまく機能していないから」と言っていたけど、実際には自分の立場を守るために使ったように見えるんだ。それで、軍隊が国会に突入して、議員たちを捕まえようとしたんだ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "ええっ！？国会議員を捕まえようとするなんて、すごく危ないことじゃないですか。"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "その通りだよ。もし軍隊が国会を占拠していたら、国会で戒厳令を解除することもできなかったかもしれない。つまり、大統領がずっと自分の好きなように国を支配できるようになってしまうんだ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "韓国ではどうなったんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "幸い、野党の議員や市民たちが急いで集まって抗議して、6時間後に戒厳令は解除されたんだ。でも、ほんの少しの違いで、韓国の民主主義が大きく傷つけられるところだったんだよ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "それは大変なことですね…。日本ではそんなこと起きないんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "実はね、今、日本でも似たような話があるんだよ。自民党が「緊急事態宣言」を憲法に追加しようとしているんだ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "緊急事態宣言って、韓国の戒厳令と同じようなものなんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "似ている部分があるね。たとえば、総理大臣が「社会秩序の混乱の危険があるから」と言えば、特別な権限を使って国を動かすことができるんだ。法律と同じ力を持つ命令を出したり、地方自治体に指示を出したりすることができるんだよ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "それって便利そうですけど、なんだか心配です。"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "そうだね。もちろん、緊急時には素早い対応が必要だから便利な面もあるけど、その権限が濫用されると、とても危険なんだ。たとえば、総理大臣が自分に都合のいいように国を動かしたり、国民の自由を奪ったりすることができるようになってしまうかもしれない。"
-      },
-      {
-        "speaker": "Student",
-        "text": "韓国みたいに、軍隊が政治に口を出してくることもあり得るんですか？"
-      },
-      {
-        "speaker": "Teacher",
-        "text": "完全にあり得ないとは言えないからこそ、注意が必要なんだ。私たち国民は、自民党の改憲案が権力の濫用を防ぐための適切な制限を含んでいるのかをしっかり監視し、声を上げることが求められる。民主主義が損なわれるのを防ぐために、私たち一人ひとりが積極的に関心を持つことが大切なんだよ。"
-      },
-      {
-        "speaker": "Student",
-        "text": "ありがとうございます。とても良い勉強になりました。"
-      },
-      {
-        "speaker": "Announcer",
-        "text": "ご視聴、ありがとうございました。次回の放送もお楽しみに。"
-      }
-    ]
-  }
+  "scriptName": "sensei_and_taro.json"
 }

package/lib/actions/audio.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { MulmoScriptMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
 import { text2hash, localizedText } from "../utils/utils.js";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const { default: __, ...vanillaAgents } = agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -73,11 +74,14 @@ const graph_tts = {
                 text: ":preprocessor.text",
                 file: ":preprocessor.audioPath",
                 force: ":context.force",
-            },
-            params: {
-                voice: ":preprocessor.voiceId",
-                speed: ":preprocessor.speechOptions.speed",
-                instructions: ":preprocessor.speechOptions.instruction",
+                studio: ":context.studio", // for cache
+                index: ":__mapIndex", // for cache
+                sessionType: "audio", // for cache
+                params: {
+                    voice: ":preprocessor.voiceId",
+                    speed: ":preprocessor.speechOptions.speed",
+                    instructions: ":preprocessor.speechOptions.instruction",
+                },
             },
         },
     },
@@ -157,30 +161,36 @@ const agentFilters = [
     },
 ];
 export const audio = async (context) => {
-    const { studio, fileDirs, lang } = context;
-    const { outDirPath, audioDirPath } = fileDirs;
-    const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
-    const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
-    const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
-    const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
-    mkdir(outDirPath);
-    mkdir(audioSegmentDirPath);
-    graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
-    const graph = new GraphAI(graph_data, {
-        ...vanillaAgents,
-        fileWriteAgent,
-        ttsOpenaiAgent,
-        ttsNijivoiceAgent,
-        ttsGoogleAgent,
-        addBGMAgent,
-        combineAudioFilesAgent,
-    }, { agentFilters });
-    graph.injectValue("context", context);
-    graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
-    graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
-    graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-    graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
-    graph.injectValue("audioDirPath", audioDirPath);
-    await graph.run();
-    writingMessage(audioCombinedFilePath);
+    try {
+        MulmoStudioMethods.setSessionState(context.studio, "audio", true);
+        const { studio, fileDirs, lang } = context;
+        const { outDirPath, audioDirPath } = fileDirs;
+        const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
+        const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
+        const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
+        const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
+        mkdir(outDirPath);
+        mkdir(audioSegmentDirPath);
+        graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
+        const graph = new GraphAI(graph_data, {
+            ...vanillaAgents,
+            fileWriteAgent,
+            ttsOpenaiAgent,
+            ttsNijivoiceAgent,
+            ttsGoogleAgent,
+            addBGMAgent,
+            combineAudioFilesAgent,
+        }, { agentFilters });
+        graph.injectValue("context", context);
+        graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
+        graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
+        graph.injectValue("outputStudioFilePath", outputStudioFilePath);
+        graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
+        graph.injectValue("audioDirPath", audioDirPath);
+        await graph.run();
+        writingMessage(audioCombinedFilePath);
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "audio", false);
+    }
 };

package/lib/actions/captions.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
 import * as agents from "@graphai/vanilla";
 import { getHTMLFile } from "../utils/file.js";
 import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const { default: __, ...vanillaAgents } = agents;
 const graph_data = {
     version: 0.5,
@@ -17,31 +18,37 @@ const graph_data = {
             },
             graph: {
                 nodes: {
-                    test: {
+                    generateCaption: {
                         agent: async (namedInputs) => {
                             const { beat, context, index } = namedInputs;
-                            const { fileDirs } = namedInputs.context;
-                            const { caption } = context;
-                            const { imageDirPath } = fileDirs;
-                            const { canvasSize } = context.studio.script;
-                            const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
-                            const template = getHTMLFile("caption");
-                            const text = (() => {
-                                const multiLingual = context.studio.multiLingual;
-                                if (caption && multiLingual) {
-                                    return multiLingual[index].multiLingualTexts[caption].text;
-                                }
-                                GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
-                                return beat.text;
-                            })();
-                            const htmlData = interpolate(template, {
-                                caption: text,
-                                width: `${canvasSize.width}`,
-                                height: `${canvasSize.height}`,
-                            });
-                            await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
-                            context.studio.beats[index].captionFile = imagePath;
-                            return imagePath;
+                            try {
+                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
+                                const { fileDirs } = namedInputs.context;
+                                const { caption } = context;
+                                const { imageDirPath } = fileDirs;
+                                const { canvasSize } = context.studio.script;
+                                const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
+                                const template = getHTMLFile("caption");
+                                const text = (() => {
+                                    const multiLingual = context.studio.multiLingual;
+                                    if (caption && multiLingual) {
+                                        return multiLingual[index].multiLingualTexts[caption].text;
+                                    }
+                                    GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
+                                    return beat.text;
+                                })();
+                                const htmlData = interpolate(template, {
+                                    caption: text,
+                                    width: `${canvasSize.width}`,
+                                    height: `${canvasSize.height}`,
+                                });
+                                await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
+                                context.studio.beats[index].captionFile = imagePath;
+                                return imagePath;
+                            }
+                            finally {
+                                MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
+                            }
                         },
                         inputs: {
                             beat: ":beat",
@@ -56,7 +63,13 @@ const graph_data = {
     },
 };
 export const captions = async (context) => {
-    const graph = new GraphAI(graph_data, { ...vanillaAgents });
-    graph.injectValue("context", context);
-    await graph.run();
+    try {
+        MulmoStudioMethods.setSessionState(context.studio, "caption", true);
+        const graph = new GraphAI(graph_data, { ...vanillaAgents });
+        graph.injectValue("context", context);
+        await graph.run();
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "caption", false);
+    }
 };

package/lib/actions/images.js CHANGED Viewed

@@ -13,6 +13,7 @@ const { default: __, ...vanillaAgents } = agents;
 dotenv.config();
 // const openai = new OpenAI();
 import { GoogleAuth } from "google-auth-library";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const htmlStyle = (script, beat) => {
     return {
         canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -30,10 +31,16 @@ const imagePreprocessAgent = async (namedInputs) => {
     if (beat.image) {
         const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
         if (plugin) {
-            const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
-            const path = await plugin.process(processorParams);
-            // undefined prompt indicates that image generation is not needed
-            return { path, ...returnValue };
+            try {
+                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
+                const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
+                const path = await plugin.process(processorParams);
+                // undefined prompt indicates that image generation is not needed
+                return { path, ...returnValue };
+            }
+            finally {
+                MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
+            }
         }
     }
     const prompt = imagePrompt(beat, imageParams.style);
@@ -71,17 +78,21 @@ const graph_data = {
                     imageGenerator: {
                         if: ":preprocessor.prompt",
                         agent: ":imageAgentInfo.agent",
-                        params: {
-                            model: ":preprocessor.imageParams.model",
-                            size: ":preprocessor.imageParams.size",
-                            moderation: ":preprocessor.imageParams.moderation",
-                            aspectRatio: ":preprocessor.aspectRatio",
-                        },
+                        retry: 3,
                         inputs: {
                             prompt: ":preprocessor.prompt",
                             file: ":preprocessor.path", // only for fileCacheAgentFilter
                             text: ":preprocessor.prompt", // only for fileCacheAgentFilter
                             force: ":context.force",
+                            studio: ":context.studio", // for cache
+                            index: ":__mapIndex", // for cache
+                            sessionType: "image", // for cache
+                            params: {
+                                model: ":preprocessor.imageParams.model",
+                                size: ":preprocessor.imageParams.size",
+                                moderation: ":preprocessor.imageParams.moderation",
+                                aspectRatio: ":preprocessor.aspectRatio",
+                            },
                         },
                         defaultValue: {},
                     },
@@ -133,7 +144,7 @@ const googleAuth = async () => {
     const accessToken = await client.getAccessToken();
     return accessToken.token;
 };
-export const images = async (context) => {
+const generateImages = async (context) => {
     const { studio, fileDirs } = context;
     const { outDirPath, imageDirPath } = fileDirs;
     mkdir(`${imageDirPath}/${studio.filename}`);
@@ -172,3 +183,12 @@ export const images = async (context) => {
     });
     await graph.run();
 };
+export const images = async (context) => {
+    try {
+        MulmoStudioMethods.setSessionState(context.studio, "image", true);
+        await generateImages(context);
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "image", false);
+    }
+};

package/lib/actions/movie.js CHANGED Viewed

@@ -2,27 +2,28 @@ import { GraphAILogger } from "graphai";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
     const videoId = `v${inputIndex}`;
+    const videoFilters = [];
+    // Handle different media types
+    if (mediaType === "image") {
+        videoFilters.push("loop=loop=-1:size=1:start=0");
+    }
+    else if (mediaType === "movie") {
+        // For videos, extend with last frame if shorter than required duration
+        // tpad will extend the video by cloning the last frame, then trim will ensure exact duration
+        videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
+    }
+    // Common filters for all media types
+    videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
+    // In case of the aspect ratio mismatch, we fill the extra space with black color.
+    `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
     return {
         videoId,
-        videoPart: `[${inputIndex}:v]` +
-            [
-                mediaType === "image" ? "loop=loop=-1:size=1:start=0" : "",
-                `trim=duration=${duration}`,
-                "fps=30",
-                "setpts=PTS-STARTPTS",
-                `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
-                // In case of the aspect ratio mismatch, we fill the extra space with black color.
-                `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
-                "setsar=1",
-                "format=yuv420p",
-            ]
-                .filter((a) => a)
-                .join(",") +
-            `[${videoId}]`,
+        videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
     };
 };
 export const getAudioPart = (inputIndex, duration, delay) => {
@@ -121,12 +122,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
     await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
     const end = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
+    GraphAILogger.info(studio.script.title);
+    GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
 };
 export const movie = async (context) => {
-    const { studio, fileDirs, caption } = context;
-    const { outDirPath } = fileDirs;
-    const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
-    const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
-    await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
-    writingMessage(outputVideoPath);
+    MulmoStudioMethods.setSessionState(context.studio, "video", true);
+    try {
+        const { studio, fileDirs, caption } = context;
+        const { outDirPath } = fileDirs;
+        const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
+        const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
+        await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
+        writingMessage(outputVideoPath);
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "video", false);
+    }
 };

package/lib/actions/pdf.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const imagesPerPage = 4;
 const offset = 10;
 const handoutImageRatio = 0.5;
@@ -189,7 +190,7 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
     }
     return { width: 612, height: 792 };
 };
-export const pdf = async (context, pdfMode, pdfSize) => {
+const generatePdf = async (context, pdfMode, pdfSize) => {
     const { studio, fileDirs, lang } = context;
     const { multiLingual } = studio;
     const { outDirPath } = fileDirs;
@@ -219,3 +220,12 @@ export const pdf = async (context, pdfMode, pdfSize) => {
     fs.writeFileSync(outputPdfPath, pdfBytes);
     writingMessage(outputPdfPath);
 };
+export const pdf = async (context, pdfMode, pdfSize) => {
+    try {
+        MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
+        await generatePdf(context, pdfMode, pdfSize);
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
+    }
+};

package/lib/actions/translate.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
+import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
 const { default: __, ...vanillaAgents } = agents;
 const translateGraph = {
     version: 0.5,
@@ -62,6 +63,7 @@ const translateGraph = {
                             rows: ":targetLangs",
                             lang: ":lang.text",
                             studio: ":studio",
+                            beatIndex: ":__mapIndex",
                         },
                         params: {
                             compositeResult: true,
@@ -76,6 +78,8 @@ const translateGraph = {
                                         beat: ":beat", // for cache
                                         multiLingual: ":multiLingual", // for cache
                                         lang: ":lang", // for cache
+                                        beatIndex: ":beatIndex", // for cache
+                                        studio: ":studio", // for cache
                                         system: translateSystemPrompt,
                                         prompt: translatePrompts,
                                     },
@@ -171,7 +175,7 @@ const translateGraph = {
 };
 const localizedTextCacheAgentFilter = async (context, next) => {
     const { namedInputs } = context;
-    const { targetLang, beat, lang, multiLingual } = namedInputs;
+    const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
     if (!beat.text) {
         return { text: "" };
     }
@@ -187,7 +191,13 @@ const localizedTextCacheAgentFilter = async (context, next) => {
     if (targetLang === lang) {
         return { text: beat.text };
     }
-    return await next(context);
+    try {
+        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
+        return await next(context);
+    }
+    finally {
+        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
+    }
 };
 const agentFilters = [
     {
@@ -199,21 +209,26 @@ const agentFilters = [
 const defaultLang = "en";
 const targetLangs = ["ja", "en"];
 export const translate = async (context) => {
-    const { studio, fileDirs } = context;
-    const { outDirPath } = fileDirs;
-    const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
-    mkdir(outDirPath);
-    assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
-    const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
-    graph.injectValue("studio", studio);
-    graph.injectValue("defaultLang", defaultLang);
-    graph.injectValue("targetLangs", targetLangs);
-    graph.injectValue("outDirPath", outDirPath);
-    graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-    const results = await graph.run();
-    writingMessage(outputStudioFilePath);
-    if (results.mergeStudioResult) {
-        context.studio = results.mergeStudioResult;
+    try {
+        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
+        const { studio, fileDirs } = context;
+        const { outDirPath } = fileDirs;
+        const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
+        mkdir(outDirPath);
+        assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
+        const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
+        graph.injectValue("studio", studio);
+        graph.injectValue("defaultLang", defaultLang);
+        graph.injectValue("targetLangs", targetLangs);
+        graph.injectValue("outDirPath", outDirPath);
+        graph.injectValue("outputStudioFilePath", outputStudioFilePath);
+        const results = await graph.run();
+        writingMessage(outputStudioFilePath);
+        if (results.mergeStudioResult) {
+            context.studio = results.mergeStudioResult;
+        }
+    }
+    finally {
+        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
     }
-    // console.log(JSON.stringify(results, null, 2));
 };

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -9,19 +9,34 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
     const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
     ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
     const inputIds = (await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
+        const beat = context.studio.script.beats[index];
         const isClosingGap = index === context.studio.beats.length - 2;
         if (studioBeat.audioFile) {
             const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
             const padding = (() => {
+                if (beat.audioParams?.padding !== undefined) {
+                    return beat.audioParams.padding;
+                }
                 if (index === context.studio.beats.length - 1) {
                     return 0;
                 }
                 return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
             })();
-            studioBeat.duration = (await ffmpegGetMediaDuration(studioBeat.audioFile)) + padding;
-            if (padding > 0) {
+            const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
+            const totalPadding = await (async () => {
+                if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
+                    const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
+                    const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
+                    if (movieDuration > audioDuration) {
+                        return padding + (movieDuration - audioDuration);
+                    }
+                }
+                return padding;
+            })();
+            studioBeat.duration = audioDuration + totalPadding;
+            if (totalPadding > 0) {
                 const silentId = silentIds.pop();
-                ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${padding}[padding_${index}]`);
+                ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${totalPadding}[padding_${index}]`);
                 return [audioId, `[padding_${index}]`];
             }
             else {
@@ -30,16 +45,12 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
         }
         else {
             // NOTE: We come here when the text is empty and no audio property is specified.
-            studioBeat.duration = context.studio.script.beats[index].duration ?? 1.0;
+            studioBeat.duration = beat.duration ?? 1.0;
             const silentId = silentIds.pop();
             ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${studioBeat.duration}[silent_${index}]`);
             return [`[silent_${index}]`];
         }
     }))).flat();
-    // HACK: Because the last beat may not use an silent audio, we need to consume it to make ffmpeg happy.
-    if (silentIds.length > 1) {
-        throw new Error("UNEXPECTED: silentIds.length > 1");
-    }
     silentIds.forEach((silentId) => {
         GraphAILogger.log(`Using extra silentId: ${silentId}`);
         ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}[silent_extra]`);

package/lib/cli/commands/tool/scripting/builder.d.ts CHANGED Viewed

@@ -14,7 +14,7 @@ export declare const builder: (yargs: Argv) => Argv<{
 } & {
     s: string;
 } & {
-    llm_agent: string | undefined;
+    llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
 } & {
     llm_model: string | undefined;
 }>;