mulmocast 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +257 -39
  2. package/assets/audio/silent60sec.mp3 +0 -0
  3. package/assets/html/caption.html +45 -0
  4. package/assets/html/chart.html +1 -1
  5. package/assets/html/mermaid.html +6 -2
  6. package/assets/html/tailwind.html +13 -0
  7. package/assets/templates/business.json +57 -4
  8. package/assets/templates/comic_strips.json +35 -0
  9. package/assets/templates/ghibli_strips.json +35 -0
  10. package/lib/actions/audio.js +24 -11
  11. package/lib/actions/captions.d.ts +2 -0
  12. package/lib/actions/captions.js +62 -0
  13. package/lib/actions/images.js +3 -2
  14. package/lib/actions/index.d.ts +1 -0
  15. package/lib/actions/index.js +1 -0
  16. package/lib/actions/movie.js +78 -86
  17. package/lib/actions/pdf.js +15 -5
  18. package/lib/actions/translate.js +32 -26
  19. package/lib/agents/add_bgm_agent.js +15 -39
  20. package/lib/agents/combine_audio_files_agent.js +43 -36
  21. package/lib/agents/index.d.ts +2 -3
  22. package/lib/agents/index.js +2 -3
  23. package/lib/agents/tts_google_agent.d.ts +4 -0
  24. package/lib/agents/tts_google_agent.js +51 -0
  25. package/lib/agents/validate_schema_agent.d.ts +19 -0
  26. package/lib/agents/validate_schema_agent.js +36 -0
  27. package/lib/cli/args.d.ts +2 -0
  28. package/lib/cli/args.js +9 -2
  29. package/lib/cli/bin.d.ts +3 -0
  30. package/lib/cli/bin.js +38 -0
  31. package/lib/cli/cli.js +34 -7
  32. package/lib/cli/commands/audio/builder.d.ts +14 -0
  33. package/lib/cli/commands/audio/builder.js +6 -0
  34. package/lib/cli/commands/audio/handler.d.ts +4 -0
  35. package/lib/cli/commands/audio/handler.js +7 -0
  36. package/lib/cli/commands/audio/index.d.ts +4 -0
  37. package/lib/cli/commands/audio/index.js +4 -0
  38. package/lib/cli/commands/image/builder.d.ts +14 -0
  39. package/lib/cli/commands/image/builder.js +6 -0
  40. package/lib/cli/commands/image/handler.d.ts +4 -0
  41. package/lib/cli/commands/image/handler.js +7 -0
  42. package/lib/cli/commands/image/index.d.ts +4 -0
  43. package/lib/cli/commands/image/index.js +4 -0
  44. package/lib/cli/commands/movie/builder.d.ts +18 -0
  45. package/lib/cli/commands/movie/builder.js +19 -0
  46. package/lib/cli/commands/movie/handler.d.ts +6 -0
  47. package/lib/cli/commands/movie/handler.js +12 -0
  48. package/lib/cli/commands/movie/index.d.ts +4 -0
  49. package/lib/cli/commands/movie/index.js +4 -0
  50. package/lib/cli/commands/pdf/builder.d.ts +18 -0
  51. package/lib/cli/commands/pdf/builder.js +19 -0
  52. package/lib/cli/commands/pdf/handler.d.ts +6 -0
  53. package/lib/cli/commands/pdf/handler.js +8 -0
  54. package/lib/cli/commands/pdf/index.d.ts +4 -0
  55. package/lib/cli/commands/pdf/index.js +4 -0
  56. package/lib/cli/commands/tool/index.d.ts +6 -0
  57. package/lib/cli/commands/tool/index.js +8 -0
  58. package/lib/cli/commands/tool/prompt/builder.d.ts +4 -0
  59. package/lib/cli/commands/tool/prompt/builder.js +11 -0
  60. package/lib/cli/commands/tool/prompt/handler.d.ts +4 -0
  61. package/lib/cli/commands/tool/prompt/handler.js +14 -0
  62. package/lib/cli/commands/tool/prompt/index.d.ts +4 -0
  63. package/lib/cli/commands/tool/prompt/index.js +4 -0
  64. package/lib/cli/commands/tool/schema/builder.d.ts +2 -0
  65. package/lib/cli/commands/tool/schema/builder.js +3 -0
  66. package/lib/cli/commands/tool/schema/handler.d.ts +2 -0
  67. package/lib/cli/commands/tool/schema/handler.js +12 -0
  68. package/lib/cli/commands/tool/schema/index.d.ts +4 -0
  69. package/lib/cli/commands/tool/schema/index.js +4 -0
  70. package/lib/cli/commands/tool/scripting/builder.d.ts +20 -0
  71. package/lib/cli/commands/tool/scripting/builder.js +63 -0
  72. package/lib/cli/commands/tool/scripting/handler.d.ts +12 -0
  73. package/lib/cli/commands/tool/scripting/handler.js +36 -0
  74. package/lib/cli/commands/tool/scripting/index.d.ts +4 -0
  75. package/lib/cli/commands/tool/scripting/index.js +4 -0
  76. package/lib/cli/commands/tool/story_to_script/builder.d.ts +18 -0
  77. package/lib/cli/commands/tool/story_to_script/builder.js +53 -0
  78. package/lib/cli/commands/tool/story_to_script/handler.d.ts +11 -0
  79. package/lib/cli/commands/tool/story_to_script/handler.js +35 -0
  80. package/lib/cli/commands/tool/story_to_script/index.d.ts +4 -0
  81. package/lib/cli/commands/tool/story_to_script/index.js +4 -0
  82. package/lib/cli/commands/translate/builder.d.ts +14 -0
  83. package/lib/cli/commands/translate/builder.js +5 -0
  84. package/lib/cli/commands/translate/handler.d.ts +4 -0
  85. package/lib/cli/commands/translate/handler.js +6 -0
  86. package/lib/cli/commands/translate/index.d.ts +4 -0
  87. package/lib/cli/commands/translate/index.js +4 -0
  88. package/lib/cli/common.d.ts +6 -2
  89. package/lib/cli/common.js +18 -7
  90. package/lib/cli/helpers.d.ts +38 -0
  91. package/lib/cli/helpers.js +115 -0
  92. package/lib/cli/tool-args.d.ts +1 -0
  93. package/lib/cli/tool-args.js +1 -1
  94. package/lib/cli/tool-cli.js +8 -0
  95. package/lib/methods/mulmo_script.d.ts +0 -1
  96. package/lib/methods/mulmo_script.js +4 -7
  97. package/lib/methods/mulmo_script_template.js +2 -12
  98. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
  99. package/lib/tools/create_mulmo_script_from_url.js +43 -14
  100. package/lib/tools/create_mulmo_script_interactively.js +14 -13
  101. package/lib/tools/dump_prompt.js +2 -0
  102. package/lib/tools/story_to_script.d.ts +10 -0
  103. package/lib/tools/story_to_script.js +201 -0
  104. package/lib/types/cli_types.d.ts +14 -0
  105. package/lib/types/cli_types.js +1 -0
  106. package/lib/types/schema.d.ts +493 -176
  107. package/lib/types/schema.js +37 -7
  108. package/lib/types/type.d.ts +6 -1
  109. package/lib/utils/const.d.ts +1 -0
  110. package/lib/utils/const.js +1 -0
  111. package/lib/utils/ffmpeg_utils.d.ts +12 -0
  112. package/lib/utils/ffmpeg_utils.js +63 -0
  113. package/lib/utils/file.d.ts +7 -3
  114. package/lib/utils/file.js +24 -5
  115. package/lib/utils/image_plugins/chart.js +6 -1
  116. package/lib/utils/image_plugins/html_tailwind.d.ts +3 -0
  117. package/lib/utils/image_plugins/html_tailwind.js +18 -0
  118. package/lib/utils/image_plugins/index.d.ts +2 -1
  119. package/lib/utils/image_plugins/index.js +2 -1
  120. package/lib/utils/image_plugins/mermaid.js +1 -1
  121. package/lib/utils/image_plugins/tailwind.d.ts +3 -0
  122. package/lib/utils/image_plugins/tailwind.js +18 -0
  123. package/lib/utils/image_plugins/text_slide.js +9 -2
  124. package/lib/utils/markdown.d.ts +1 -1
  125. package/lib/utils/markdown.js +8 -2
  126. package/lib/utils/preprocess.d.ts +23 -12
  127. package/lib/utils/preprocess.js +4 -0
  128. package/lib/utils/prompt.d.ts +15 -0
  129. package/lib/utils/prompt.js +57 -0
  130. package/lib/utils/utils.d.ts +2 -0
  131. package/lib/utils/utils.js +10 -0
  132. package/package.json +27 -23
@@ -5,17 +5,19 @@ import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
5
5
  import addBGMAgent from "../agents/add_bgm_agent.js";
6
6
  import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
7
7
  import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
+ import ttsGoogleAgent from "../agents/tts_google_agent.js";
8
9
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
10
  import { MulmoScriptMethods } from "../methods/index.js";
10
11
  import { fileCacheAgentFilter } from "../utils/filters.js";
11
12
  import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
12
- import { text2hash } from "../utils/utils.js";
13
+ import { text2hash, localizedText } from "../utils/utils.js";
13
14
  const { default: __, ...vanillaAgents } = agents;
14
15
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
15
16
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
16
17
  const provider_to_agent = {
17
18
  nijivoice: "ttsNijivoiceAgent",
18
19
  openai: "ttsOpenaiAgent",
20
+ google: "ttsGoogleAgent",
19
21
  };
20
22
  const getAudioPath = (context, beat, audioFile, audioDirPath) => {
21
23
  if (beat.audio?.type === "audio") {
@@ -25,23 +27,30 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
25
27
  }
26
28
  throw new Error("Invalid audio source");
27
29
  }
30
+ if (beat.text === "") {
31
+ return undefined; // It indicates that the audio is not needed.
32
+ }
28
33
  return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
29
34
  };
30
35
  const preprocessor = (namedInputs) => {
31
- const { beat, index, context, audioDirPath } = namedInputs;
32
- const studioBeat = context.studio.beats[index];
36
+ const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
37
+ const { lang } = context;
33
38
  const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
34
39
  const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
35
- const hash_string = `${beat.text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
36
- const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}`;
40
+ const text = localizedText(beat, multiLingual, lang);
41
+ const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
42
+ const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
37
43
  const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
38
44
  studioBeat.audioFile = audioPath;
45
+ const needsTTS = !beat.audio && audioPath !== undefined;
39
46
  return {
40
47
  ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
41
48
  studioBeat,
42
49
  voiceId,
43
50
  speechOptions,
44
51
  audioPath,
52
+ text,
53
+ needsTTS,
45
54
  };
46
55
  };
47
56
  const graph_tts = {
@@ -50,16 +59,18 @@ const graph_tts = {
50
59
  agent: preprocessor,
51
60
  inputs: {
52
61
  beat: ":beat",
62
+ studioBeat: ":studioBeat",
63
+ multiLingual: ":multiLingual",
53
64
  index: ":__mapIndex",
54
65
  context: ":context",
55
66
  audioDirPath: ":audioDirPath",
56
67
  },
57
68
  },
58
69
  tts: {
59
- unless: ":beat.audio",
70
+ if: ":preprocessor.needsTTS",
60
71
  agent: ":preprocessor.ttsAgent",
61
72
  inputs: {
62
- text: ":beat.text",
73
+ text: ":preprocessor.text",
63
74
  file: ":preprocessor.audioPath",
64
75
  force: ":context.force",
65
76
  },
@@ -85,13 +96,15 @@ const graph_data = {
85
96
  agent: "mapAgent",
86
97
  inputs: {
87
98
  rows: ":context.studio.script.beats",
88
- studio: ":context.studio",
99
+ studioBeat: ":context.studio.beats",
100
+ multiLingual: ":context.studio.multiLingual",
89
101
  audioDirPath: ":audioDirPath",
90
102
  audioSegmentDirPath: ":audioSegmentDirPath",
91
103
  context: ":context",
92
104
  },
93
105
  params: {
94
106
  rowKey: "beat",
107
+ expandKeys: ["studioBeat", "multiLingual"],
95
108
  },
96
109
  graph: graph_tts,
97
110
  },
@@ -101,7 +114,6 @@ const graph_data = {
101
114
  map: ":map",
102
115
  context: ":context",
103
116
  combinedFileName: ":audioCombinedFilePath",
104
- audioDirPath: ":audioDirPath",
105
117
  },
106
118
  isResult: true,
107
119
  },
@@ -145,11 +157,11 @@ const agentFilters = [
145
157
  },
146
158
  ];
147
159
  export const audio = async (context) => {
148
- const { studio, fileDirs } = context;
160
+ const { studio, fileDirs, lang } = context;
149
161
  const { outDirPath, audioDirPath } = fileDirs;
150
162
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
151
163
  const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
152
- const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename);
164
+ const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
153
165
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
154
166
  mkdir(outDirPath);
155
167
  mkdir(audioSegmentDirPath);
@@ -159,6 +171,7 @@ export const audio = async (context) => {
159
171
  fileWriteAgent,
160
172
  ttsOpenaiAgent,
161
173
  ttsNijivoiceAgent,
174
+ ttsGoogleAgent,
162
175
  addBGMAgent,
163
176
  combineAudioFilesAgent,
164
177
  }, { agentFilters });
@@ -0,0 +1,2 @@
1
+ import { MulmoStudioContext } from "../types/index.js";
2
+ export declare const captions: (context: MulmoStudioContext) => Promise<void>;
@@ -0,0 +1,62 @@
1
+ import { GraphAI, GraphAILogger } from "graphai";
2
+ import * as agents from "@graphai/vanilla";
3
+ import { getHTMLFile } from "../utils/file.js";
4
+ import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
+ const { default: __, ...vanillaAgents } = agents;
6
+ const graph_data = {
7
+ version: 0.5,
8
+ nodes: {
9
+ context: {},
10
+ map: {
11
+ agent: "mapAgent",
12
+ inputs: { rows: ":context.studio.script.beats", context: ":context" },
13
+ isResult: true,
14
+ params: {
15
+ rowKey: "beat",
16
+ compositeResult: true,
17
+ },
18
+ graph: {
19
+ nodes: {
20
+ test: {
21
+ agent: async (namedInputs) => {
22
+ const { beat, context, index } = namedInputs;
23
+ const { fileDirs } = namedInputs.context;
24
+ const { caption } = context;
25
+ const { imageDirPath } = fileDirs;
26
+ const { canvasSize } = context.studio.script;
27
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
28
+ const template = getHTMLFile("caption");
29
+ const text = (() => {
30
+ const multiLingual = context.studio.multiLingual;
31
+ if (caption && multiLingual) {
32
+ return multiLingual[index].multiLingualTexts[caption].text;
33
+ }
34
+ GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
35
+ return beat.text;
36
+ })();
37
+ const htmlData = interpolate(template, {
38
+ caption: text,
39
+ width: `${canvasSize.width}`,
40
+ height: `${canvasSize.height}`,
41
+ });
42
+ await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
43
+ context.studio.beats[index].captionFile = imagePath;
44
+ return imagePath;
45
+ },
46
+ inputs: {
47
+ beat: ":beat",
48
+ context: ":context",
49
+ index: ":__mapIndex",
50
+ },
51
+ isResult: true,
52
+ },
53
+ },
54
+ },
55
+ },
56
+ },
57
+ };
58
+ export const captions = async (context) => {
59
+ const graph = new GraphAI(graph_data, { ...vanillaAgents });
60
+ graph.injectValue("context", context);
61
+ await graph.run();
62
+ };
@@ -8,6 +8,7 @@ import imageGoogleAgent from "../agents/image_google_agent.js";
8
8
  import imageOpenaiAgent from "../agents/image_openai_agent.js";
9
9
  import { MulmoScriptMethods } from "../methods/index.js";
10
10
  import { imagePlugins } from "../utils/image_plugins/index.js";
11
+ import { imagePrompt } from "../utils/prompt.js";
11
12
  const { default: __, ...vanillaAgents } = agents;
12
13
  dotenv.config();
13
14
  // const openai = new OpenAI();
@@ -35,12 +36,12 @@ const imagePreprocessAgent = async (namedInputs) => {
35
36
  return { path, ...returnValue };
36
37
  }
37
38
  }
38
- const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
39
+ const prompt = imagePrompt(beat, imageParams.style);
39
40
  return { path: imagePath, prompt, ...returnValue };
40
41
  };
41
42
  const graph_data = {
42
43
  version: 0.5,
43
- concurrency: 2,
44
+ concurrency: 4,
44
45
  nodes: {
45
46
  context: {},
46
47
  imageDirPath: {},
@@ -3,3 +3,4 @@ export * from "./images.js";
3
3
  export * from "./movie.js";
4
4
  export * from "./pdf.js";
5
5
  export * from "./translate.js";
6
+ export * from "./captions.js";
@@ -3,3 +3,4 @@ export * from "./images.js";
3
3
  export * from "./movie.js";
4
4
  export * from "./pdf.js";
5
5
  export * from "./translate.js";
6
+ export * from "./captions.js";
@@ -1,9 +1,9 @@
1
- import ffmpeg from "fluent-ffmpeg";
2
1
  import { GraphAILogger } from "graphai";
3
2
  import { MulmoScriptMethods } from "../methods/index.js";
4
3
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
5
- const isMac = process.platform === "darwin";
6
- const videoCodec = isMac ? "h264_videotoolbox" : "libx264";
4
+ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
5
+ // const isMac = process.platform === "darwin";
6
+ const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
7
7
  export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
8
8
  const videoId = `v${inputIndex}`;
9
9
  return {
@@ -14,7 +14,9 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
14
14
  `trim=duration=${duration}`,
15
15
  "fps=30",
16
16
  "setpts=PTS-STARTPTS",
17
- `scale=${canvasInfo.width}:${canvasInfo.height}`,
17
+ `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
18
+ // In case of the aspect ratio mismatch, we fill the extra space with black color.
19
+ `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
18
20
  "setsar=1",
19
21
  "format=yuv420p",
20
22
  ]
@@ -29,112 +31,102 @@ export const getAudioPart = (inputIndex, duration, delay) => {
29
31
  audioId,
30
32
  audioPart: `[${inputIndex}:a]` +
31
33
  `atrim=duration=${duration},` + // Trim to beat duration
32
- `adelay=${delay}|${delay},` +
34
+ `adelay=${delay * 1000}|${delay * 1000},` +
33
35
  `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
34
36
  `[${audioId}]`,
35
37
  };
36
38
  };
37
39
  const getOutputOption = (audioId) => {
38
40
  return [
39
- "-preset veryfast", // Faster encoding
41
+ "-preset medium", // Changed from veryfast to medium for better compression
40
42
  "-map [v]", // Map the video stream
41
43
  `-map ${audioId}`, // Map the audio stream
42
44
  `-c:v ${videoCodec}`, // Set video codec
45
+ ...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
43
46
  "-threads 8",
44
47
  "-filter_threads 8",
45
- "-b:v 5M", // bitrate (only for videotoolbox)
48
+ "-b:v 2M", // Reduced from 5M to 2M
46
49
  "-bufsize",
47
- "10M", // Add buffer size for better quality
50
+ "4M", // Reduced buffer size
48
51
  "-maxrate",
49
- "7M", // Maximum bitrate
52
+ "3M", // Reduced from 7M to 3M
50
53
  "-r 30", // Set frame rate
51
54
  "-pix_fmt yuv420p", // Set pixel format for better compatibility
55
+ "-c:a aac", // Audio codec
56
+ "-b:a 128k", // Audio bitrate
52
57
  ];
53
58
  };
54
- const createVideo = (audioArtifactFilePath, outputVideoPath, studio) => {
55
- return new Promise((resolve, reject) => {
56
- const start = performance.now();
57
- const ffmpegContext = {
58
- command: ffmpeg(),
59
- inputCount: 0,
60
- };
61
- function addInput(input) {
62
- ffmpegContext.command = ffmpegContext.command.input(input);
63
- ffmpegContext.inputCount++;
64
- return ffmpegContext.inputCount - 1; // returned the index of the input
59
+ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
60
+ const start = performance.now();
61
+ const ffmpegContext = FfmpegContextInit();
62
+ if (studio.beats.some((beat) => !beat.imageFile)) {
63
+ GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
64
+ return;
65
+ }
66
+ const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
67
+ // Add each image input
68
+ const filterComplexVideoIds = [];
69
+ const filterComplexAudioIds = [];
70
+ studio.beats.reduce((timestamp, beat, index) => {
71
+ if (!beat.imageFile || !beat.duration) {
72
+ throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
65
73
  }
66
- if (studio.beats.some((beat) => !beat.imageFile)) {
67
- GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
68
- return;
69
- }
70
- const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
71
- const padding = MulmoScriptMethods.getPadding(studio.script) / 1000;
72
- // Add each image input
73
- const filterComplexParts = [];
74
- const filterComplexVideoIds = [];
75
- const filterComplexAudioIds = [];
76
- studio.beats.reduce((timestamp, beat, index) => {
77
- if (!beat.imageFile || !beat.duration) {
78
- throw new Error(`beat.imageFile is not set: index=${index}`);
79
- }
80
- const inputIndex = addInput(beat.imageFile);
81
- const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
82
- const headOrTail = index === 0 || index === studio.beats.length - 1;
83
- const duration = beat.duration + (headOrTail ? padding : 0);
84
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
85
- filterComplexVideoIds.push(videoId);
86
- filterComplexParts.push(videoPart);
87
- if (mediaType === "movie") {
88
- const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp * 1000);
89
- filterComplexAudioIds.push(audioId);
90
- filterComplexParts.push(audioPart);
74
+ const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
75
+ const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
76
+ const extraPadding = (() => {
77
+ // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
78
+ if (index === 0) {
79
+ return studio.script.audioParams.introPadding;
91
80
  }
92
- return timestamp + duration;
93
- }, 0);
94
- // console.log("*** images", images.audioIds);
95
- // Concatenate the trimmed images
96
- filterComplexParts.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
97
- const audioIndex = addInput(audioArtifactFilePath); // Add audio input
98
- const artifactAudioId = `${audioIndex}:a`;
99
- const ffmpegContextAudioId = (() => {
100
- if (filterComplexAudioIds.length > 0) {
101
- const mainAudioId = "mainaudio";
102
- const compositeAudioId = "composite";
103
- const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
104
- filterComplexParts.push(`[${artifactAudioId}]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[${mainAudioId}]`);
105
- filterComplexParts.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
106
- return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
81
+ else if (index === studio.beats.length - 1) {
82
+ return studio.script.audioParams.outroPadding;
107
83
  }
108
- return artifactAudioId;
84
+ return 0;
109
85
  })();
110
- // Apply the filter complex for concatenation and map audio input
111
- ffmpegContext.command
112
- .complexFilter(filterComplexParts)
113
- .outputOptions(getOutputOption(ffmpegContextAudioId))
114
- .on("start", (__cmdLine) => {
115
- GraphAILogger.log("Started FFmpeg ..."); // with command:', cmdLine);
116
- })
117
- .on("error", (err, stdout, stderr) => {
118
- GraphAILogger.error("Error occurred:", err);
119
- GraphAILogger.error("FFmpeg stdout:", stdout);
120
- GraphAILogger.error("FFmpeg stderr:", stderr);
121
- GraphAILogger.info("Video creation failed. An unexpected error occurred.");
122
- reject();
123
- })
124
- .on("end", () => {
125
- const end = performance.now();
126
- GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
127
- resolve(0);
128
- })
129
- .output(outputVideoPath)
130
- .run();
131
- });
86
+ const duration = beat.duration + extraPadding;
87
+ const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
88
+ ffmpegContext.filterComplex.push(videoPart);
89
+ if (caption && beat.captionFile) {
90
+ const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
91
+ const compositeVideoId = `c${index}`;
92
+ ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
93
+ filterComplexVideoIds.push(compositeVideoId);
94
+ }
95
+ else {
96
+ filterComplexVideoIds.push(videoId);
97
+ }
98
+ if (mediaType === "movie") {
99
+ const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
100
+ filterComplexAudioIds.push(audioId);
101
+ ffmpegContext.filterComplex.push(audioPart);
102
+ }
103
+ return timestamp + duration;
104
+ }, 0);
105
+ // console.log("*** images", images.audioIds);
106
+ // Concatenate the trimmed images
107
+ ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
108
+ const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
109
+ const artifactAudioId = `${audioIndex}:a`;
110
+ const ffmpegContextAudioId = (() => {
111
+ if (filterComplexAudioIds.length > 0) {
112
+ const mainAudioId = "mainaudio";
113
+ const compositeAudioId = "composite";
114
+ const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
115
+ FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
116
+ ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
117
+ return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
118
+ }
119
+ return artifactAudioId;
120
+ })();
121
+ await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
122
+ const end = performance.now();
123
+ GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
132
124
  };
133
125
  export const movie = async (context) => {
134
- const { studio, fileDirs } = context;
126
+ const { studio, fileDirs, caption } = context;
135
127
  const { outDirPath } = fileDirs;
136
128
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
137
- const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename);
138
- await createVideo(audioArtifactFilePath, outputVideoPath, studio);
129
+ const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
130
+ await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
139
131
  writingMessage(outputVideoPath);
140
132
  };
@@ -2,7 +2,7 @@ import fs from "fs";
2
2
  import path from "path";
3
3
  import { rgb, PDFDocument } from "pdf-lib";
4
4
  import fontkit from "@pdf-lib/fontkit";
5
- import { chunkArray, isHttp } from "../utils/utils.js";
5
+ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
6
6
  import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
7
7
  import { MulmoScriptMethods } from "../methods/index.js";
8
8
  import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
@@ -19,7 +19,14 @@ const readImage = async (imagePath, pdfDoc) => {
19
19
  return fs.readFileSync(imagePath);
20
20
  })();
21
21
  const ext = path.extname(imagePath).toLowerCase();
22
- return ext === ".jpg" || ext === ".jpeg" ? await pdfDoc.embedJpg(imageBytes) : await pdfDoc.embedPng(imageBytes);
22
+ if (ext === ".jpg" || ext === ".jpeg") {
23
+ return await pdfDoc.embedJpg(imageBytes);
24
+ }
25
+ if (ext === ".png") {
26
+ return await pdfDoc.embedPng(imageBytes);
27
+ }
28
+ // workaround. TODO: movie, image should convert to png/jpeg image
29
+ return await pdfDoc.embedPng(fs.readFileSync("assets/images/mulmocast_credit.png"));
23
30
  };
24
31
  const pdfSlide = async (pageWidth, pageHeight, imagePaths, pdfDoc) => {
25
32
  const cellRatio = pageHeight / pageWidth;
@@ -183,15 +190,18 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
183
190
  return { width: 612, height: 792 };
184
191
  };
185
192
  export const pdf = async (context, pdfMode, pdfSize) => {
186
- const { studio, fileDirs } = context;
193
+ const { studio, fileDirs, lang } = context;
194
+ const { multiLingual } = studio;
187
195
  const { outDirPath } = fileDirs;
188
196
  const { width: imageWidth, height: imageHeight } = MulmoScriptMethods.getCanvasSize(studio.script);
189
197
  const isLandscapeImage = imageWidth > imageHeight;
190
198
  const isRotate = pdfMode === "handout";
191
199
  const { width: pageWidth, height: pageHeight } = outputSize(pdfSize, isLandscapeImage, isRotate);
192
200
  const imagePaths = studio.beats.map((beat) => beat.imageFile);
193
- const texts = studio.script.beats.map((beat) => beat.text);
194
- const outputPdfPath = getOutputPdfFilePath(outDirPath, studio.filename, pdfMode);
201
+ const texts = studio.script.beats.map((beat, index) => {
202
+ return localizedText(beat, multiLingual?.[index], lang);
203
+ });
204
+ const outputPdfPath = getOutputPdfFilePath(outDirPath, studio.filename, pdfMode, lang);
195
205
  const pdfDoc = await PDFDocument.create();
196
206
  pdfDoc.registerFontkit(fontkit);
197
207
  const fontBytes = fs.readFileSync("assets/font/NotoSansJP-Regular.ttf");
@@ -5,6 +5,7 @@ import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
+ import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
8
9
  const { default: __, ...vanillaAgents } = agents;
9
10
  const translateGraph = {
10
11
  version: 0.5,
@@ -25,7 +26,7 @@ const translateGraph = {
25
26
  isResult: true,
26
27
  agent: "mergeObjectAgent",
27
28
  inputs: {
28
- items: [":studio", { beats: ":beatsMap.mergeBeatData" }],
29
+ items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
29
30
  },
30
31
  },
31
32
  beatsMap: {
@@ -43,20 +44,21 @@ const translateGraph = {
43
44
  graph: {
44
45
  version: 0.5,
45
46
  nodes: {
46
- studioBeat: {
47
+ // for cache
48
+ multiLingual: {
47
49
  agent: (namedInputs) => {
48
- return namedInputs.rows[namedInputs.index];
50
+ return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
49
51
  },
50
52
  inputs: {
51
53
  index: ":__mapIndex",
52
- rows: ":studio.beats",
54
+ rows: ":studio.multiLingual",
53
55
  },
54
56
  },
55
- preprocessBeats: {
57
+ preprocessMultiLingual: {
56
58
  agent: "mapAgent",
57
59
  inputs: {
58
60
  beat: ":beat",
59
- studioBeat: ":studioBeat",
61
+ multiLingual: ":multiLingual",
60
62
  rows: ":targetLangs",
61
63
  lang: ":lang.text",
62
64
  studio: ":studio",
@@ -70,12 +72,12 @@ const translateGraph = {
70
72
  nodes: {
71
73
  localizedTexts: {
72
74
  inputs: {
73
- targetLang: ":targetLang",
74
- beat: ":beat",
75
- studioBeat: ":studioBeat",
76
- lang: ":lang",
77
- system: "Please translate the given text into the language specified in language (in locale format, like en, ja, fr, ch).",
78
- prompt: ["## Original Language", ":lang", "", "## Language", ":targetLang", "", "## Target", ":beat.text"],
75
+ targetLang: ":targetLang", // for cache
76
+ beat: ":beat", // for cache
77
+ multiLingual: ":multiLingual", // for cache
78
+ lang: ":lang", // for cache
79
+ system: translateSystemPrompt,
80
+ prompt: translatePrompts,
79
81
  },
80
82
  passThrough: {
81
83
  lang: ":targetLang",
@@ -141,17 +143,17 @@ const translateGraph = {
141
143
  mergeLocalizedText: {
142
144
  agent: "arrayToObjectAgent",
143
145
  inputs: {
144
- items: ":preprocessBeats.ttsTexts",
146
+ items: ":preprocessMultiLingual.ttsTexts",
145
147
  },
146
148
  params: {
147
149
  key: "lang",
148
150
  },
149
151
  },
150
- mergeBeatData: {
152
+ mergeMultiLingualData: {
151
153
  isResult: true,
152
154
  agent: "mergeObjectAgent",
153
155
  inputs: {
154
- items: [":studioBeat", { multiLingualTexts: ":mergeLocalizedText" }],
156
+ items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
155
157
  },
156
158
  },
157
159
  },
@@ -169,14 +171,17 @@ const translateGraph = {
169
171
  };
170
172
  const localizedTextCacheAgentFilter = async (context, next) => {
171
173
  const { namedInputs } = context;
172
- const { targetLang, beat, lang, studioBeat } = namedInputs;
174
+ const { targetLang, beat, lang, multiLingual } = namedInputs;
175
+ if (!beat.text) {
176
+ return { text: "" };
177
+ }
173
178
  // The original text is unchanged and the target language text is present
174
- if (studioBeat.multiLingualTexts &&
175
- studioBeat.multiLingualTexts[lang] &&
176
- studioBeat.multiLingualTexts[lang].text === beat.text &&
177
- studioBeat.multiLingualTexts[targetLang] &&
178
- studioBeat.multiLingualTexts[targetLang].text) {
179
- return { text: studioBeat.multiLingualTexts[targetLang].text };
179
+ if (multiLingual.multiLingualTexts &&
180
+ multiLingual.multiLingualTexts[lang] &&
181
+ multiLingual.multiLingualTexts[lang].text === beat.text &&
182
+ multiLingual.multiLingualTexts[targetLang] &&
183
+ multiLingual.multiLingualTexts[targetLang].text) {
184
+ return { text: multiLingual.multiLingualTexts[targetLang].text };
180
185
  }
181
186
  // same language
182
187
  if (targetLang === lang) {
@@ -205,9 +210,10 @@ export const translate = async (context) => {
205
210
  graph.injectValue("targetLangs", targetLangs);
206
211
  graph.injectValue("outDirPath", outDirPath);
207
212
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
208
- await graph.run();
213
+ const results = await graph.run();
209
214
  writingMessage(outputStudioFilePath);
210
- // const results = await graph.run();
211
- // const mulmoDataResult = results.mergeResult;
212
- // console.log(JSON.stringify(mulmoDataResult, null, 2));
215
+ if (results.mergeStudioResult) {
216
+ context.studio = results.mergeStudioResult;
217
+ }
218
+ // console.log(JSON.stringify(results, null, 2));
213
219
  };