npm - mulmocast - Versions diffs - 0.0.5 → 0.0.6 - Mend

mulmocast 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

package/README.md +257 -39
package/assets/audio/silent60sec.mp3 +0 -0
package/assets/html/caption.html +45 -0
package/assets/html/chart.html +1 -1
package/assets/html/mermaid.html +6 -2
package/assets/html/tailwind.html +13 -0
package/assets/templates/business.json +57 -4
package/assets/templates/comic_strips.json +35 -0
package/assets/templates/ghibli_strips.json +35 -0
package/lib/actions/audio.js +24 -11
package/lib/actions/captions.d.ts +2 -0
package/lib/actions/captions.js +62 -0
package/lib/actions/images.js +3 -2
package/lib/actions/index.d.ts +1 -0
package/lib/actions/index.js +1 -0
package/lib/actions/movie.js +78 -86
package/lib/actions/pdf.js +15 -5
package/lib/actions/translate.js +32 -26
package/lib/agents/add_bgm_agent.js +15 -39
package/lib/agents/combine_audio_files_agent.js +43 -36
package/lib/agents/index.d.ts +2 -3
package/lib/agents/index.js +2 -3
package/lib/agents/tts_google_agent.d.ts +4 -0
package/lib/agents/tts_google_agent.js +51 -0
package/lib/agents/validate_schema_agent.d.ts +19 -0
package/lib/agents/validate_schema_agent.js +36 -0
package/lib/cli/args.d.ts +2 -0
package/lib/cli/args.js +9 -2
package/lib/cli/bin.d.ts +3 -0
package/lib/cli/bin.js +38 -0
package/lib/cli/cli.js +34 -7
package/lib/cli/commands/audio/builder.d.ts +14 -0
package/lib/cli/commands/audio/builder.js +6 -0
package/lib/cli/commands/audio/handler.d.ts +4 -0
package/lib/cli/commands/audio/handler.js +7 -0
package/lib/cli/commands/audio/index.d.ts +4 -0
package/lib/cli/commands/audio/index.js +4 -0
package/lib/cli/commands/image/builder.d.ts +14 -0
package/lib/cli/commands/image/builder.js +6 -0
package/lib/cli/commands/image/handler.d.ts +4 -0
package/lib/cli/commands/image/handler.js +7 -0
package/lib/cli/commands/image/index.d.ts +4 -0
package/lib/cli/commands/image/index.js +4 -0
package/lib/cli/commands/movie/builder.d.ts +18 -0
package/lib/cli/commands/movie/builder.js +19 -0
package/lib/cli/commands/movie/handler.d.ts +6 -0
package/lib/cli/commands/movie/handler.js +12 -0
package/lib/cli/commands/movie/index.d.ts +4 -0
package/lib/cli/commands/movie/index.js +4 -0
package/lib/cli/commands/pdf/builder.d.ts +18 -0
package/lib/cli/commands/pdf/builder.js +19 -0
package/lib/cli/commands/pdf/handler.d.ts +6 -0
package/lib/cli/commands/pdf/handler.js +8 -0
package/lib/cli/commands/pdf/index.d.ts +4 -0
package/lib/cli/commands/pdf/index.js +4 -0
package/lib/cli/commands/tool/index.d.ts +6 -0
package/lib/cli/commands/tool/index.js +8 -0
package/lib/cli/commands/tool/prompt/builder.d.ts +4 -0
package/lib/cli/commands/tool/prompt/builder.js +11 -0
package/lib/cli/commands/tool/prompt/handler.d.ts +4 -0
package/lib/cli/commands/tool/prompt/handler.js +14 -0
package/lib/cli/commands/tool/prompt/index.d.ts +4 -0
package/lib/cli/commands/tool/prompt/index.js +4 -0
package/lib/cli/commands/tool/schema/builder.d.ts +2 -0
package/lib/cli/commands/tool/schema/builder.js +3 -0
package/lib/cli/commands/tool/schema/handler.d.ts +2 -0
package/lib/cli/commands/tool/schema/handler.js +12 -0
package/lib/cli/commands/tool/schema/index.d.ts +4 -0
package/lib/cli/commands/tool/schema/index.js +4 -0
package/lib/cli/commands/tool/scripting/builder.d.ts +20 -0
package/lib/cli/commands/tool/scripting/builder.js +63 -0
package/lib/cli/commands/tool/scripting/handler.d.ts +12 -0
package/lib/cli/commands/tool/scripting/handler.js +36 -0
package/lib/cli/commands/tool/scripting/index.d.ts +4 -0
package/lib/cli/commands/tool/scripting/index.js +4 -0
package/lib/cli/commands/tool/story_to_script/builder.d.ts +18 -0
package/lib/cli/commands/tool/story_to_script/builder.js +53 -0
package/lib/cli/commands/tool/story_to_script/handler.d.ts +11 -0
package/lib/cli/commands/tool/story_to_script/handler.js +35 -0
package/lib/cli/commands/tool/story_to_script/index.d.ts +4 -0
package/lib/cli/commands/tool/story_to_script/index.js +4 -0
package/lib/cli/commands/translate/builder.d.ts +14 -0
package/lib/cli/commands/translate/builder.js +5 -0
package/lib/cli/commands/translate/handler.d.ts +4 -0
package/lib/cli/commands/translate/handler.js +6 -0
package/lib/cli/commands/translate/index.d.ts +4 -0
package/lib/cli/commands/translate/index.js +4 -0
package/lib/cli/common.d.ts +6 -2
package/lib/cli/common.js +18 -7
package/lib/cli/helpers.d.ts +38 -0
package/lib/cli/helpers.js +115 -0
package/lib/cli/tool-args.d.ts +1 -0
package/lib/cli/tool-args.js +1 -1
package/lib/cli/tool-cli.js +8 -0
package/lib/methods/mulmo_script.d.ts +0 -1
package/lib/methods/mulmo_script.js +4 -7
package/lib/methods/mulmo_script_template.js +2 -12
package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
package/lib/tools/create_mulmo_script_from_url.js +43 -14
package/lib/tools/create_mulmo_script_interactively.js +14 -13
package/lib/tools/dump_prompt.js +2 -0
package/lib/tools/story_to_script.d.ts +10 -0
package/lib/tools/story_to_script.js +201 -0
package/lib/types/cli_types.d.ts +14 -0
package/lib/types/cli_types.js +1 -0
package/lib/types/schema.d.ts +493 -176
package/lib/types/schema.js +37 -7
package/lib/types/type.d.ts +6 -1
package/lib/utils/const.d.ts +1 -0
package/lib/utils/const.js +1 -0
package/lib/utils/ffmpeg_utils.d.ts +12 -0
package/lib/utils/ffmpeg_utils.js +63 -0
package/lib/utils/file.d.ts +7 -3
package/lib/utils/file.js +24 -5
package/lib/utils/image_plugins/chart.js +6 -1
package/lib/utils/image_plugins/html_tailwind.d.ts +3 -0
package/lib/utils/image_plugins/html_tailwind.js +18 -0
package/lib/utils/image_plugins/index.d.ts +2 -1
package/lib/utils/image_plugins/index.js +2 -1
package/lib/utils/image_plugins/mermaid.js +1 -1
package/lib/utils/image_plugins/tailwind.d.ts +3 -0
package/lib/utils/image_plugins/tailwind.js +18 -0
package/lib/utils/image_plugins/text_slide.js +9 -2
package/lib/utils/markdown.d.ts +1 -1
package/lib/utils/markdown.js +8 -2
package/lib/utils/preprocess.d.ts +23 -12
package/lib/utils/preprocess.js +4 -0
package/lib/utils/prompt.d.ts +15 -0
package/lib/utils/prompt.js +57 -0
package/lib/utils/utils.d.ts +2 -0
package/lib/utils/utils.js +10 -0
package/package.json +27 -23

package/lib/actions/audio.js CHANGED Viewed

@@ -5,17 +5,19 @@ import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
 import addBGMAgent from "../agents/add_bgm_agent.js";
 import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
 import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
+import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
-import { text2hash } from "../utils/utils.js";
+import { text2hash, localizedText } from "../utils/utils.js";
 const { default: __, ...vanillaAgents } = agents;
 // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
 // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
 const provider_to_agent = {
     nijivoice: "ttsNijivoiceAgent",
     openai: "ttsOpenaiAgent",
+    google: "ttsGoogleAgent",
 };
 const getAudioPath = (context, beat, audioFile, audioDirPath) => {
     if (beat.audio?.type === "audio") {
@@ -25,23 +27,30 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
         }
         throw new Error("Invalid audio source");
     }
+    if (beat.text === "") {
+        return undefined; // It indicates that the audio is not needed.
+    }
     return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
 };
 const preprocessor = (namedInputs) => {
-    const { beat, index, context, audioDirPath } = namedInputs;
-    const studioBeat = context.studio.beats[index];
+    const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
+    const { lang } = context;
     const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
     const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
-    const hash_string = `${beat.text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
-    const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}`;
+    const text = localizedText(beat, multiLingual, lang);
+    const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
+    const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
     const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
     studioBeat.audioFile = audioPath;
+    const needsTTS = !beat.audio && audioPath !== undefined;
     return {
         ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
         studioBeat,
         voiceId,
         speechOptions,
         audioPath,
+        text,
+        needsTTS,
     };
 };
 const graph_tts = {
@@ -50,16 +59,18 @@ const graph_tts = {
             agent: preprocessor,
             inputs: {
                 beat: ":beat",
+                studioBeat: ":studioBeat",
+                multiLingual: ":multiLingual",
                 index: ":__mapIndex",
                 context: ":context",
                 audioDirPath: ":audioDirPath",
             },
         },
         tts: {
-            unless: ":beat.audio",
+            if: ":preprocessor.needsTTS",
             agent: ":preprocessor.ttsAgent",
             inputs: {
-                text: ":beat.text",
+                text: ":preprocessor.text",
                 file: ":preprocessor.audioPath",
                 force: ":context.force",
             },
@@ -85,13 +96,15 @@ const graph_data = {
             agent: "mapAgent",
             inputs: {
                 rows: ":context.studio.script.beats",
-                studio: ":context.studio",
+                studioBeat: ":context.studio.beats",
+                multiLingual: ":context.studio.multiLingual",
                 audioDirPath: ":audioDirPath",
                 audioSegmentDirPath: ":audioSegmentDirPath",
                 context: ":context",
             },
             params: {
                 rowKey: "beat",
+                expandKeys: ["studioBeat", "multiLingual"],
             },
             graph: graph_tts,
         },
@@ -101,7 +114,6 @@ const graph_data = {
                 map: ":map",
                 context: ":context",
                 combinedFileName: ":audioCombinedFilePath",
-                audioDirPath: ":audioDirPath",
             },
             isResult: true,
         },
@@ -145,11 +157,11 @@ const agentFilters = [
     },
 ];
 export const audio = async (context) => {
-    const { studio, fileDirs } = context;
+    const { studio, fileDirs, lang } = context;
     const { outDirPath, audioDirPath } = fileDirs;
     const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
     const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
-    const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename);
+    const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
     const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
     mkdir(outDirPath);
     mkdir(audioSegmentDirPath);
@@ -159,6 +171,7 @@ export const audio = async (context) => {
         fileWriteAgent,
         ttsOpenaiAgent,
         ttsNijivoiceAgent,
+        ttsGoogleAgent,
         addBGMAgent,
         combineAudioFilesAgent,
     }, { agentFilters });

package/lib/actions/captions.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { MulmoStudioContext } from "../types/index.js";
2	+ export declare const captions: (context: MulmoStudioContext) => Promise<void>;

package/lib/actions/captions.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { GraphAI, GraphAILogger } from "graphai";
+import * as agents from "@graphai/vanilla";
+import { getHTMLFile } from "../utils/file.js";
+import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
+const { default: __, ...vanillaAgents } = agents;
+const graph_data = {
+    version: 0.5,
+    nodes: {
+        context: {},
+        map: {
+            agent: "mapAgent",
+            inputs: { rows: ":context.studio.script.beats", context: ":context" },
+            isResult: true,
+            params: {
+                rowKey: "beat",
+                compositeResult: true,
+            },
+            graph: {
+                nodes: {
+                    test: {
+                        agent: async (namedInputs) => {
+                            const { beat, context, index } = namedInputs;
+                            const { fileDirs } = namedInputs.context;
+                            const { caption } = context;
+                            const { imageDirPath } = fileDirs;
+                            const { canvasSize } = context.studio.script;
+                            const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
+                            const template = getHTMLFile("caption");
+                            const text = (() => {
+                                const multiLingual = context.studio.multiLingual;
+                                if (caption && multiLingual) {
+                                    return multiLingual[index].multiLingualTexts[caption].text;
+                                }
+                                GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
+                                return beat.text;
+                            })();
+                            const htmlData = interpolate(template, {
+                                caption: text,
+                                width: `${canvasSize.width}`,
+                                height: `${canvasSize.height}`,
+                            });
+                            await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
+                            context.studio.beats[index].captionFile = imagePath;
+                            return imagePath;
+                        },
+                        inputs: {
+                            beat: ":beat",
+                            context: ":context",
+                            index: ":__mapIndex",
+                        },
+                        isResult: true,
+                    },
+                },
+            },
+        },
+    },
+};
+export const captions = async (context) => {
+    const graph = new GraphAI(graph_data, { ...vanillaAgents });
+    graph.injectValue("context", context);
+    await graph.run();
+};

package/lib/actions/images.js CHANGED Viewed

@@ -8,6 +8,7 @@ import imageGoogleAgent from "../agents/image_google_agent.js";
 import imageOpenaiAgent from "../agents/image_openai_agent.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { imagePlugins } from "../utils/image_plugins/index.js";
+import { imagePrompt } from "../utils/prompt.js";
 const { default: __, ...vanillaAgents } = agents;
 dotenv.config();
 // const openai = new OpenAI();
@@ -35,12 +36,12 @@ const imagePreprocessAgent = async (namedInputs) => {
             return { path, ...returnValue };
         }
     }
-    const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
+    const prompt = imagePrompt(beat, imageParams.style);
     return { path: imagePath, prompt, ...returnValue };
 };
 const graph_data = {
     version: 0.5,
-    concurrency: 2,
+    concurrency: 4,
     nodes: {
         context: {},
         imageDirPath: {},

package/lib/actions/index.d.ts CHANGED Viewed

@@ -3,3 +3,4 @@ export * from "./images.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";
+export * from "./captions.js";

package/lib/actions/index.js CHANGED Viewed

@@ -3,3 +3,4 @@ export * from "./images.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";
+export * from "./captions.js";

package/lib/actions/movie.js CHANGED Viewed

@@ -1,9 +1,9 @@
-import ffmpeg from "fluent-ffmpeg";
 import { GraphAILogger } from "graphai";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
-const isMac = process.platform === "darwin";
-const videoCodec = isMac ? "h264_videotoolbox" : "libx264";
+import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
+// const isMac = process.platform === "darwin";
+const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
     const videoId = `v${inputIndex}`;
     return {
@@ -14,7 +14,9 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
                 `trim=duration=${duration}`,
                 "fps=30",
                 "setpts=PTS-STARTPTS",
-                `scale=${canvasInfo.width}:${canvasInfo.height}`,
+                `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
+                // In case of the aspect ratio mismatch, we fill the extra space with black color.
+                `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
                 "setsar=1",
                 "format=yuv420p",
             ]
@@ -29,112 +31,102 @@ export const getAudioPart = (inputIndex, duration, delay) => {
         audioId,
         audioPart: `[${inputIndex}:a]` +
             `atrim=duration=${duration},` + // Trim to beat duration
-            `adelay=${delay}|${delay},` +
+            `adelay=${delay * 1000}|${delay * 1000},` +
             `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
             `[${audioId}]`,
     };
 };
 const getOutputOption = (audioId) => {
     return [
-        "-preset veryfast", // Faster encoding
+        "-preset medium", // Changed from veryfast to medium for better compression
         "-map [v]", // Map the video stream
         `-map ${audioId}`, // Map the audio stream
         `-c:v ${videoCodec}`, // Set video codec
+        ...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
         "-threads 8",
         "-filter_threads 8",
-        "-b:v 5M", // bitrate (only for videotoolbox)
+        "-b:v 2M", // Reduced from 5M to 2M
         "-bufsize",
-        "10M", // Add buffer size for better quality
+        "4M", // Reduced buffer size
         "-maxrate",
-        "7M", // Maximum bitrate
+        "3M", // Reduced from 7M to 3M
         "-r 30", // Set frame rate
         "-pix_fmt yuv420p", // Set pixel format for better compatibility
+        "-c:a aac", // Audio codec
+        "-b:a 128k", // Audio bitrate
     ];
 };
-const createVideo = (audioArtifactFilePath, outputVideoPath, studio) => {
-    return new Promise((resolve, reject) => {
-        const start = performance.now();
-        const ffmpegContext = {
-            command: ffmpeg(),
-            inputCount: 0,
-        };
-        function addInput(input) {
-            ffmpegContext.command = ffmpegContext.command.input(input);
-            ffmpegContext.inputCount++;
-            return ffmpegContext.inputCount - 1; // returned the index of the input
+const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
+    const start = performance.now();
+    const ffmpegContext = FfmpegContextInit();
+    if (studio.beats.some((beat) => !beat.imageFile)) {
+        GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
+        return;
+    }
+    const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
+    // Add each image input
+    const filterComplexVideoIds = [];
+    const filterComplexAudioIds = [];
+    studio.beats.reduce((timestamp, beat, index) => {
+        if (!beat.imageFile || !beat.duration) {
+            throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
         }
-        if (studio.beats.some((beat) => !beat.imageFile)) {
-            GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
-            return;
-        }
-        const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
-        const padding = MulmoScriptMethods.getPadding(studio.script) / 1000;
-        // Add each image input
-        const filterComplexParts = [];
-        const filterComplexVideoIds = [];
-        const filterComplexAudioIds = [];
-        studio.beats.reduce((timestamp, beat, index) => {
-            if (!beat.imageFile || !beat.duration) {
-                throw new Error(`beat.imageFile is not set: index=${index}`);
-            }
-            const inputIndex = addInput(beat.imageFile);
-            const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
-            const headOrTail = index === 0 || index === studio.beats.length - 1;
-            const duration = beat.duration + (headOrTail ? padding : 0);
-            const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
-            filterComplexVideoIds.push(videoId);
-            filterComplexParts.push(videoPart);
-            if (mediaType === "movie") {
-                const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp * 1000);
-                filterComplexAudioIds.push(audioId);
-                filterComplexParts.push(audioPart);
+        const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
+        const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
+        const extraPadding = (() => {
+            // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
+            if (index === 0) {
+                return studio.script.audioParams.introPadding;
             }
-            return timestamp + duration;
-        }, 0);
-        // console.log("*** images", images.audioIds);
-        // Concatenate the trimmed images
-        filterComplexParts.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
-        const audioIndex = addInput(audioArtifactFilePath); // Add audio input
-        const artifactAudioId = `${audioIndex}:a`;
-        const ffmpegContextAudioId = (() => {
-            if (filterComplexAudioIds.length > 0) {
-                const mainAudioId = "mainaudio";
-                const compositeAudioId = "composite";
-                const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
-                filterComplexParts.push(`[${artifactAudioId}]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[${mainAudioId}]`);
-                filterComplexParts.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
-                return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
+            else if (index === studio.beats.length - 1) {
+                return studio.script.audioParams.outroPadding;
             }
-            return artifactAudioId;
+            return 0;
         })();
-        // Apply the filter complex for concatenation and map audio input
-        ffmpegContext.command
-            .complexFilter(filterComplexParts)
-            .outputOptions(getOutputOption(ffmpegContextAudioId))
-            .on("start", (__cmdLine) => {
-            GraphAILogger.log("Started FFmpeg ..."); // with command:', cmdLine);
-        })
-            .on("error", (err, stdout, stderr) => {
-            GraphAILogger.error("Error occurred:", err);
-            GraphAILogger.error("FFmpeg stdout:", stdout);
-            GraphAILogger.error("FFmpeg stderr:", stderr);
-            GraphAILogger.info("Video creation failed. An unexpected error occurred.");
-            reject();
-        })
-            .on("end", () => {
-            const end = performance.now();
-            GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
-            resolve(0);
-        })
-            .output(outputVideoPath)
-            .run();
-    });
+        const duration = beat.duration + extraPadding;
+        const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
+        ffmpegContext.filterComplex.push(videoPart);
+        if (caption && beat.captionFile) {
+            const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
+            const compositeVideoId = `c${index}`;
+            ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
+            filterComplexVideoIds.push(compositeVideoId);
+        }
+        else {
+            filterComplexVideoIds.push(videoId);
+        }
+        if (mediaType === "movie") {
+            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
+            filterComplexAudioIds.push(audioId);
+            ffmpegContext.filterComplex.push(audioPart);
+        }
+        return timestamp + duration;
+    }, 0);
+    // console.log("*** images", images.audioIds);
+    // Concatenate the trimmed images
+    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
+    const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
+    const artifactAudioId = `${audioIndex}:a`;
+    const ffmpegContextAudioId = (() => {
+        if (filterComplexAudioIds.length > 0) {
+            const mainAudioId = "mainaudio";
+            const compositeAudioId = "composite";
+            const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
+            FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
+            ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
+            return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
+        }
+        return artifactAudioId;
+    })();
+    await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
+    const end = performance.now();
+    GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
 };
 export const movie = async (context) => {
-    const { studio, fileDirs } = context;
+    const { studio, fileDirs, caption } = context;
     const { outDirPath } = fileDirs;
     const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
-    const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename);
-    await createVideo(audioArtifactFilePath, outputVideoPath, studio);
+    const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
+    await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
     writingMessage(outputVideoPath);
 };

package/lib/actions/pdf.js CHANGED Viewed

@@ -2,7 +2,7 @@ import fs from "fs";
 import path from "path";
 import { rgb, PDFDocument } from "pdf-lib";
 import fontkit from "@pdf-lib/fontkit";
-import { chunkArray, isHttp } from "../utils/utils.js";
+import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
@@ -19,7 +19,14 @@ const readImage = async (imagePath, pdfDoc) => {
         return fs.readFileSync(imagePath);
     })();
     const ext = path.extname(imagePath).toLowerCase();
-    return ext === ".jpg" || ext === ".jpeg" ? await pdfDoc.embedJpg(imageBytes) : await pdfDoc.embedPng(imageBytes);
+    if (ext === ".jpg" || ext === ".jpeg") {
+        return await pdfDoc.embedJpg(imageBytes);
+    }
+    if (ext === ".png") {
+        return await pdfDoc.embedPng(imageBytes);
+    }
+    // workaround. TODO: movie, image should convert to png/jpeg image
+    return await pdfDoc.embedPng(fs.readFileSync("assets/images/mulmocast_credit.png"));
 };
 const pdfSlide = async (pageWidth, pageHeight, imagePaths, pdfDoc) => {
     const cellRatio = pageHeight / pageWidth;
@@ -183,15 +190,18 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
     return { width: 612, height: 792 };
 };
 export const pdf = async (context, pdfMode, pdfSize) => {
-    const { studio, fileDirs } = context;
+    const { studio, fileDirs, lang } = context;
+    const { multiLingual } = studio;
     const { outDirPath } = fileDirs;
     const { width: imageWidth, height: imageHeight } = MulmoScriptMethods.getCanvasSize(studio.script);
     const isLandscapeImage = imageWidth > imageHeight;
     const isRotate = pdfMode === "handout";
     const { width: pageWidth, height: pageHeight } = outputSize(pdfSize, isLandscapeImage, isRotate);
     const imagePaths = studio.beats.map((beat) => beat.imageFile);
-    const texts = studio.script.beats.map((beat) => beat.text);
-    const outputPdfPath = getOutputPdfFilePath(outDirPath, studio.filename, pdfMode);
+    const texts = studio.script.beats.map((beat, index) => {
+        return localizedText(beat, multiLingual?.[index], lang);
+    });
+    const outputPdfPath = getOutputPdfFilePath(outDirPath, studio.filename, pdfMode, lang);
     const pdfDoc = await PDFDocument.create();
     pdfDoc.registerFontkit(fontkit);
     const fontBytes = fs.readFileSync("assets/font/NotoSansJP-Regular.ttf");

package/lib/actions/translate.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
+import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
 const { default: __, ...vanillaAgents } = agents;
 const translateGraph = {
     version: 0.5,
@@ -25,7 +26,7 @@ const translateGraph = {
             isResult: true,
             agent: "mergeObjectAgent",
             inputs: {
-                items: [":studio", { beats: ":beatsMap.mergeBeatData" }],
+                items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
             },
         },
         beatsMap: {
@@ -43,20 +44,21 @@ const translateGraph = {
             graph: {
                 version: 0.5,
                 nodes: {
-                    studioBeat: {
+                    // for cache
+                    multiLingual: {
                         agent: (namedInputs) => {
-                            return namedInputs.rows[namedInputs.index];
+                            return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
                         },
                         inputs: {
                             index: ":__mapIndex",
-                            rows: ":studio.beats",
+                            rows: ":studio.multiLingual",
                         },
                     },
-                    preprocessBeats: {
+                    preprocessMultiLingual: {
                         agent: "mapAgent",
                         inputs: {
                             beat: ":beat",
-                            studioBeat: ":studioBeat",
+                            multiLingual: ":multiLingual",
                             rows: ":targetLangs",
                             lang: ":lang.text",
                             studio: ":studio",
@@ -70,12 +72,12 @@ const translateGraph = {
                             nodes: {
                                 localizedTexts: {
                                     inputs: {
-                                        targetLang: ":targetLang",
-                                        beat: ":beat",
-                                        studioBeat: ":studioBeat",
-                                        lang: ":lang",
-                                        system: "Please translate the given text into the language specified in language (in locale format, like en, ja, fr, ch).",
-                                        prompt: ["## Original Language", ":lang", "", "## Language", ":targetLang", "", "## Target", ":beat.text"],
+                                        targetLang: ":targetLang", // for cache
+                                        beat: ":beat", // for cache
+                                        multiLingual: ":multiLingual", // for cache
+                                        lang: ":lang", // for cache
+                                        system: translateSystemPrompt,
+                                        prompt: translatePrompts,
                                     },
                                     passThrough: {
                                         lang: ":targetLang",
@@ -141,17 +143,17 @@ const translateGraph = {
                     mergeLocalizedText: {
                         agent: "arrayToObjectAgent",
                         inputs: {
-                            items: ":preprocessBeats.ttsTexts",
+                            items: ":preprocessMultiLingual.ttsTexts",
                         },
                         params: {
                             key: "lang",
                         },
                     },
-                    mergeBeatData: {
+                    mergeMultiLingualData: {
                         isResult: true,
                         agent: "mergeObjectAgent",
                         inputs: {
-                            items: [":studioBeat", { multiLingualTexts: ":mergeLocalizedText" }],
+                            items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
                         },
                     },
                 },
@@ -169,14 +171,17 @@ const translateGraph = {
 };
 const localizedTextCacheAgentFilter = async (context, next) => {
     const { namedInputs } = context;
-    const { targetLang, beat, lang, studioBeat } = namedInputs;
+    const { targetLang, beat, lang, multiLingual } = namedInputs;
+    if (!beat.text) {
+        return { text: "" };
+    }
     // The original text is unchanged and the target language text is present
-    if (studioBeat.multiLingualTexts &&
-        studioBeat.multiLingualTexts[lang] &&
-        studioBeat.multiLingualTexts[lang].text === beat.text &&
-        studioBeat.multiLingualTexts[targetLang] &&
-        studioBeat.multiLingualTexts[targetLang].text) {
-        return { text: studioBeat.multiLingualTexts[targetLang].text };
+    if (multiLingual.multiLingualTexts &&
+        multiLingual.multiLingualTexts[lang] &&
+        multiLingual.multiLingualTexts[lang].text === beat.text &&
+        multiLingual.multiLingualTexts[targetLang] &&
+        multiLingual.multiLingualTexts[targetLang].text) {
+        return { text: multiLingual.multiLingualTexts[targetLang].text };
     }
     // same language
     if (targetLang === lang) {
@@ -205,9 +210,10 @@ export const translate = async (context) => {
     graph.injectValue("targetLangs", targetLangs);
     graph.injectValue("outDirPath", outDirPath);
     graph.injectValue("outputStudioFilePath", outputStudioFilePath);
-    await graph.run();
+    const results = await graph.run();
     writingMessage(outputStudioFilePath);
-    // const results = await graph.run();
-    // const mulmoDataResult = results.mergeResult;
-    // console.log(JSON.stringify(mulmoDataResult, null, 2));
+    if (results.mergeStudioResult) {
+        context.studio = results.mergeStudioResult;
+    }
+    // console.log(JSON.stringify(results, null, 2));
 };