mulmocast 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +294 -39
  2. package/assets/audio/silent60sec.mp3 +0 -0
  3. package/assets/html/caption.html +45 -0
  4. package/assets/html/chart.html +1 -1
  5. package/assets/html/mermaid.html +6 -2
  6. package/assets/html/tailwind.html +13 -0
  7. package/assets/templates/business.json +2 -128
  8. package/assets/templates/children_book.json +1 -128
  9. package/assets/templates/coding.json +2 -136
  10. package/assets/templates/comic_strips.json +6 -0
  11. package/assets/templates/ghibli_strips.json +6 -0
  12. package/assets/templates/sensei_and_taro.json +1 -118
  13. package/lib/actions/audio.js +62 -39
  14. package/lib/actions/captions.d.ts +2 -0
  15. package/lib/actions/captions.js +75 -0
  16. package/lib/actions/images.js +34 -13
  17. package/lib/actions/index.d.ts +1 -0
  18. package/lib/actions/index.js +1 -0
  19. package/lib/actions/movie.js +102 -101
  20. package/lib/actions/pdf.js +26 -6
  21. package/lib/actions/translate.js +60 -39
  22. package/lib/agents/add_bgm_agent.js +15 -39
  23. package/lib/agents/combine_audio_files_agent.js +53 -35
  24. package/lib/agents/index.d.ts +2 -3
  25. package/lib/agents/index.js +2 -3
  26. package/lib/agents/tts_google_agent.d.ts +4 -0
  27. package/lib/agents/tts_google_agent.js +51 -0
  28. package/lib/agents/validate_schema_agent.d.ts +19 -0
  29. package/lib/agents/validate_schema_agent.js +36 -0
  30. package/lib/cli/args.d.ts +2 -0
  31. package/lib/cli/args.js +9 -2
  32. package/lib/cli/bin.d.ts +3 -0
  33. package/lib/cli/bin.js +38 -0
  34. package/lib/cli/cli.js +34 -7
  35. package/lib/cli/commands/audio/builder.d.ts +14 -0
  36. package/lib/cli/commands/audio/builder.js +6 -0
  37. package/lib/cli/commands/audio/handler.d.ts +4 -0
  38. package/lib/cli/commands/audio/handler.js +7 -0
  39. package/lib/cli/commands/audio/index.d.ts +4 -0
  40. package/lib/cli/commands/audio/index.js +4 -0
  41. package/lib/cli/commands/image/builder.d.ts +14 -0
  42. package/lib/cli/commands/image/builder.js +6 -0
  43. package/lib/cli/commands/image/handler.d.ts +4 -0
  44. package/lib/cli/commands/image/handler.js +7 -0
  45. package/lib/cli/commands/image/index.d.ts +4 -0
  46. package/lib/cli/commands/image/index.js +4 -0
  47. package/lib/cli/commands/movie/builder.d.ts +18 -0
  48. package/lib/cli/commands/movie/builder.js +19 -0
  49. package/lib/cli/commands/movie/handler.d.ts +6 -0
  50. package/lib/cli/commands/movie/handler.js +12 -0
  51. package/lib/cli/commands/movie/index.d.ts +4 -0
  52. package/lib/cli/commands/movie/index.js +4 -0
  53. package/lib/cli/commands/pdf/builder.d.ts +18 -0
  54. package/lib/cli/commands/pdf/builder.js +19 -0
  55. package/lib/cli/commands/pdf/handler.d.ts +6 -0
  56. package/lib/cli/commands/pdf/handler.js +8 -0
  57. package/lib/cli/commands/pdf/index.d.ts +4 -0
  58. package/lib/cli/commands/pdf/index.js +4 -0
  59. package/lib/cli/commands/tool/index.d.ts +6 -0
  60. package/lib/cli/commands/tool/index.js +8 -0
  61. package/lib/cli/commands/tool/prompt/builder.d.ts +4 -0
  62. package/lib/cli/commands/tool/prompt/builder.js +11 -0
  63. package/lib/cli/commands/tool/prompt/handler.d.ts +4 -0
  64. package/lib/cli/commands/tool/prompt/handler.js +14 -0
  65. package/lib/cli/commands/tool/prompt/index.d.ts +4 -0
  66. package/lib/cli/commands/tool/prompt/index.js +4 -0
  67. package/lib/cli/commands/tool/schema/builder.d.ts +2 -0
  68. package/lib/cli/commands/tool/schema/builder.js +3 -0
  69. package/lib/cli/commands/tool/schema/handler.d.ts +2 -0
  70. package/lib/cli/commands/tool/schema/handler.js +12 -0
  71. package/lib/cli/commands/tool/schema/index.d.ts +4 -0
  72. package/lib/cli/commands/tool/schema/index.js +4 -0
  73. package/lib/cli/commands/tool/scripting/builder.d.ts +20 -0
  74. package/lib/cli/commands/tool/scripting/builder.js +63 -0
  75. package/lib/cli/commands/tool/scripting/handler.d.ts +13 -0
  76. package/lib/cli/commands/tool/scripting/handler.js +36 -0
  77. package/lib/cli/commands/tool/scripting/index.d.ts +4 -0
  78. package/lib/cli/commands/tool/scripting/index.js +4 -0
  79. package/lib/cli/commands/tool/story_to_script/builder.d.ts +20 -0
  80. package/lib/cli/commands/tool/story_to_script/builder.js +61 -0
  81. package/lib/cli/commands/tool/story_to_script/handler.d.ts +13 -0
  82. package/lib/cli/commands/tool/story_to_script/handler.js +36 -0
  83. package/lib/cli/commands/tool/story_to_script/index.d.ts +4 -0
  84. package/lib/cli/commands/tool/story_to_script/index.js +4 -0
  85. package/lib/cli/commands/translate/builder.d.ts +14 -0
  86. package/lib/cli/commands/translate/builder.js +5 -0
  87. package/lib/cli/commands/translate/handler.d.ts +4 -0
  88. package/lib/cli/commands/translate/handler.js +6 -0
  89. package/lib/cli/commands/translate/index.d.ts +4 -0
  90. package/lib/cli/commands/translate/index.js +4 -0
  91. package/lib/cli/common.d.ts +6 -2
  92. package/lib/cli/common.js +18 -7
  93. package/lib/cli/helpers.d.ts +38 -0
  94. package/lib/cli/helpers.js +115 -0
  95. package/lib/cli/tool-args.d.ts +1 -0
  96. package/lib/cli/tool-args.js +1 -1
  97. package/lib/cli/tool-cli.js +8 -0
  98. package/lib/methods/mulmo_script.d.ts +0 -1
  99. package/lib/methods/mulmo_script.js +4 -7
  100. package/lib/methods/mulmo_script_template.d.ts +2 -2
  101. package/lib/methods/mulmo_script_template.js +3 -13
  102. package/lib/methods/mulmo_studio.d.ts +8 -0
  103. package/lib/methods/mulmo_studio.js +24 -0
  104. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
  105. package/lib/tools/create_mulmo_script_from_url.js +43 -14
  106. package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
  107. package/lib/tools/create_mulmo_script_interactively.js +21 -20
  108. package/lib/tools/dump_prompt.js +2 -0
  109. package/lib/tools/story_to_script.d.ts +12 -0
  110. package/lib/tools/story_to_script.js +275 -0
  111. package/lib/types/cli_types.d.ts +14 -0
  112. package/lib/types/cli_types.js +1 -0
  113. package/lib/types/schema.d.ts +637 -1766
  114. package/lib/types/schema.js +77 -8
  115. package/lib/types/type.d.ts +10 -3
  116. package/lib/utils/const.d.ts +5 -0
  117. package/lib/utils/const.js +5 -0
  118. package/lib/utils/ffmpeg_utils.d.ts +12 -0
  119. package/lib/utils/ffmpeg_utils.js +63 -0
  120. package/lib/utils/file.d.ts +8 -3
  121. package/lib/utils/file.js +40 -9
  122. package/lib/utils/filters.js +16 -11
  123. package/lib/utils/image_plugins/chart.js +6 -1
  124. package/lib/utils/image_plugins/html_tailwind.d.ts +3 -0
  125. package/lib/utils/image_plugins/html_tailwind.js +18 -0
  126. package/lib/utils/image_plugins/index.d.ts +2 -1
  127. package/lib/utils/image_plugins/index.js +2 -1
  128. package/lib/utils/image_plugins/mermaid.js +1 -1
  129. package/lib/utils/image_plugins/tailwind.d.ts +3 -0
  130. package/lib/utils/image_plugins/tailwind.js +18 -0
  131. package/lib/utils/image_plugins/text_slide.js +9 -2
  132. package/lib/utils/markdown.d.ts +1 -1
  133. package/lib/utils/markdown.js +8 -4
  134. package/lib/utils/preprocess.d.ts +40 -10
  135. package/lib/utils/preprocess.js +7 -2
  136. package/lib/utils/prompt.d.ts +16 -0
  137. package/lib/utils/prompt.js +74 -0
  138. package/lib/utils/utils.d.ts +10 -5
  139. package/lib/utils/utils.js +37 -17
  140. package/package.json +27 -23
@@ -5,6 +5,8 @@ import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
+ import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
8
10
  const { default: __, ...vanillaAgents } = agents;
9
11
  const translateGraph = {
10
12
  version: 0.5,
@@ -25,7 +27,7 @@ const translateGraph = {
25
27
  isResult: true,
26
28
  agent: "mergeObjectAgent",
27
29
  inputs: {
28
- items: [":studio", { beats: ":beatsMap.mergeBeatData" }],
30
+ items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
29
31
  },
30
32
  },
31
33
  beatsMap: {
@@ -43,23 +45,25 @@ const translateGraph = {
43
45
  graph: {
44
46
  version: 0.5,
45
47
  nodes: {
46
- studioBeat: {
48
+ // for cache
49
+ multiLingual: {
47
50
  agent: (namedInputs) => {
48
- return namedInputs.rows[namedInputs.index];
51
+ return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
49
52
  },
50
53
  inputs: {
51
54
  index: ":__mapIndex",
52
- rows: ":studio.beats",
55
+ rows: ":studio.multiLingual",
53
56
  },
54
57
  },
55
- preprocessBeats: {
58
+ preprocessMultiLingual: {
56
59
  agent: "mapAgent",
57
60
  inputs: {
58
61
  beat: ":beat",
59
- studioBeat: ":studioBeat",
62
+ multiLingual: ":multiLingual",
60
63
  rows: ":targetLangs",
61
64
  lang: ":lang.text",
62
65
  studio: ":studio",
66
+ beatIndex: ":__mapIndex",
63
67
  },
64
68
  params: {
65
69
  compositeResult: true,
@@ -70,12 +74,14 @@ const translateGraph = {
70
74
  nodes: {
71
75
  localizedTexts: {
72
76
  inputs: {
73
- targetLang: ":targetLang",
74
- beat: ":beat",
75
- studioBeat: ":studioBeat",
76
- lang: ":lang",
77
- system: "Please translate the given text into the language specified in language (in locale format, like en, ja, fr, ch).",
78
- prompt: ["## Original Language", ":lang", "", "## Language", ":targetLang", "", "## Target", ":beat.text"],
77
+ targetLang: ":targetLang", // for cache
78
+ beat: ":beat", // for cache
79
+ multiLingual: ":multiLingual", // for cache
80
+ lang: ":lang", // for cache
81
+ beatIndex: ":beatIndex", // for cache
82
+ studio: ":studio", // for cache
83
+ system: translateSystemPrompt,
84
+ prompt: translatePrompts,
79
85
  },
80
86
  passThrough: {
81
87
  lang: ":targetLang",
@@ -141,17 +147,17 @@ const translateGraph = {
141
147
  mergeLocalizedText: {
142
148
  agent: "arrayToObjectAgent",
143
149
  inputs: {
144
- items: ":preprocessBeats.ttsTexts",
150
+ items: ":preprocessMultiLingual.ttsTexts",
145
151
  },
146
152
  params: {
147
153
  key: "lang",
148
154
  },
149
155
  },
150
- mergeBeatData: {
156
+ mergeMultiLingualData: {
151
157
  isResult: true,
152
158
  agent: "mergeObjectAgent",
153
159
  inputs: {
154
- items: [":studioBeat", { multiLingualTexts: ":mergeLocalizedText" }],
160
+ items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
155
161
  },
156
162
  },
157
163
  },
@@ -169,20 +175,29 @@ const translateGraph = {
169
175
  };
170
176
  const localizedTextCacheAgentFilter = async (context, next) => {
171
177
  const { namedInputs } = context;
172
- const { targetLang, beat, lang, studioBeat } = namedInputs;
178
+ const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
179
+ if (!beat.text) {
180
+ return { text: "" };
181
+ }
173
182
  // The original text is unchanged and the target language text is present
174
- if (studioBeat.multiLingualTexts &&
175
- studioBeat.multiLingualTexts[lang] &&
176
- studioBeat.multiLingualTexts[lang].text === beat.text &&
177
- studioBeat.multiLingualTexts[targetLang] &&
178
- studioBeat.multiLingualTexts[targetLang].text) {
179
- return { text: studioBeat.multiLingualTexts[targetLang].text };
183
+ if (multiLingual.multiLingualTexts &&
184
+ multiLingual.multiLingualTexts[lang] &&
185
+ multiLingual.multiLingualTexts[lang].text === beat.text &&
186
+ multiLingual.multiLingualTexts[targetLang] &&
187
+ multiLingual.multiLingualTexts[targetLang].text) {
188
+ return { text: multiLingual.multiLingualTexts[targetLang].text };
180
189
  }
181
190
  // same language
182
191
  if (targetLang === lang) {
183
192
  return { text: beat.text };
184
193
  }
185
- return await next(context);
194
+ try {
195
+ MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
196
+ return await next(context);
197
+ }
198
+ finally {
199
+ MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
200
+ }
186
201
  };
187
202
  const agentFilters = [
188
203
  {
@@ -194,20 +209,26 @@ const agentFilters = [
194
209
  const defaultLang = "en";
195
210
  const targetLangs = ["ja", "en"];
196
211
  export const translate = async (context) => {
197
- const { studio, fileDirs } = context;
198
- const { outDirPath } = fileDirs;
199
- const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
200
- mkdir(outDirPath);
201
- assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
202
- const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
203
- graph.injectValue("studio", studio);
204
- graph.injectValue("defaultLang", defaultLang);
205
- graph.injectValue("targetLangs", targetLangs);
206
- graph.injectValue("outDirPath", outDirPath);
207
- graph.injectValue("outputStudioFilePath", outputStudioFilePath);
208
- await graph.run();
209
- writingMessage(outputStudioFilePath);
210
- // const results = await graph.run();
211
- // const mulmoDataResult = results.mergeResult;
212
- // console.log(JSON.stringify(mulmoDataResult, null, 2));
212
+ try {
213
+ MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
214
+ const { studio, fileDirs } = context;
215
+ const { outDirPath } = fileDirs;
216
+ const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
217
+ mkdir(outDirPath);
218
+ assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
219
+ const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
220
+ graph.injectValue("studio", studio);
221
+ graph.injectValue("defaultLang", defaultLang);
222
+ graph.injectValue("targetLangs", targetLangs);
223
+ graph.injectValue("outDirPath", outDirPath);
224
+ graph.injectValue("outputStudioFilePath", outputStudioFilePath);
225
+ const results = await graph.run();
226
+ writingMessage(outputStudioFilePath);
227
+ if (results.mergeStudioResult) {
228
+ context.studio = results.mergeStudioResult;
229
+ }
230
+ }
231
+ finally {
232
+ MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
233
+ }
213
234
  };
@@ -1,46 +1,22 @@
1
1
  import { GraphAILogger } from "graphai";
2
- import ffmpeg from "fluent-ffmpeg";
3
- import { MulmoScriptMethods } from "../methods/index.js";
2
+ import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
4
3
  const addBGMAgent = async ({ namedInputs, params, }) => {
5
4
  const { voiceFile, outputFile, script } = namedInputs;
6
5
  const { musicFile } = params;
7
- const promise = new Promise((resolve, reject) => {
8
- ffmpeg.ffprobe(voiceFile, (err, metadata) => {
9
- if (err) {
10
- GraphAILogger.info("Error getting metadata: " + err.message);
11
- reject(err);
12
- }
13
- const speechDuration = metadata.format.duration;
14
- const padding = MulmoScriptMethods.getPadding(script);
15
- const totalDuration = (padding * 2) / 1000 + Math.round(speechDuration ?? 0);
16
- GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
17
- const command = ffmpeg();
18
- command
19
- .input(musicFile)
20
- .input(voiceFile)
21
- .complexFilter([
22
- // Add a 2-second delay to the speech
23
- `[1:a]adelay=${padding}|${padding}, volume=4[a1]`, // 4000ms delay for both left and right channels
24
- // Set the background music volume to 0.2
25
- `[0:a]volume=0.2[a0]`,
26
- // Mix the delayed speech and the background music
27
- `[a0][a1]amix=inputs=2:duration=longest:dropout_transition=3[amixed]`,
28
- // Trim the output to the length of speech + 8 seconds
29
- `[amixed]atrim=start=0:end=${totalDuration}[trimmed]`,
30
- // Add fade out effect for the last 4 seconds
31
- `[trimmed]afade=t=out:st=${totalDuration - padding / 1000}:d=${padding}`,
32
- ])
33
- .on("error", (err) => {
34
- GraphAILogger.info("Error: " + err.message);
35
- reject(err);
36
- })
37
- .on("end", () => {
38
- resolve(0);
39
- })
40
- .save(outputFile);
41
- });
42
- });
43
- await promise;
6
+ const speechDuration = await ffmpegGetMediaDuration(voiceFile);
7
+ const introPadding = script.audioParams.introPadding;
8
+ const outroPadding = script.audioParams.outroPadding;
9
+ const totalDuration = speechDuration + introPadding + outroPadding;
10
+ GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
11
+ const ffmpegContext = FfmpegContextInit();
12
+ const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile);
13
+ const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
14
+ ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=0.2[music]`);
15
+ ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=2, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
16
+ ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
17
+ ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
18
+ ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
19
+ await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
44
20
  return outputFile;
45
21
  };
46
22
  const addBGMAgentInfo = {
@@ -1,45 +1,63 @@
1
1
  import { GraphAILogger } from "graphai";
2
- import ffmpeg from "fluent-ffmpeg";
3
- import { silentPath, silentLastPath } from "../utils/file.js";
4
- const combineAudioFilesAgent = async ({ namedInputs }) => {
5
- const { context, combinedFileName, audioDirPath } = namedInputs;
6
- const command = ffmpeg();
7
- const getDuration = (filePath, isLastGap) => {
8
- return new Promise((resolve, reject) => {
9
- ffmpeg.ffprobe(filePath, (err, metadata) => {
10
- if (err) {
11
- GraphAILogger.info("Error while getting metadata:", err);
12
- reject(err);
2
+ import { silent60secPath } from "../utils/file.js";
3
+ import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
4
+ const combineAudioFilesAgent = async ({ namedInputs, }) => {
5
+ const { context, combinedFileName } = namedInputs;
6
+ const ffmpegContext = FfmpegContextInit();
7
+ const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath);
8
+ // We cannot reuse longSilentId. We need to explicitly split it for each beat.
9
+ const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
10
+ ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
11
+ const inputIds = (await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
12
+ const beat = context.studio.script.beats[index];
13
+ const isClosingGap = index === context.studio.beats.length - 2;
14
+ if (studioBeat.audioFile) {
15
+ const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
16
+ const padding = (() => {
17
+ if (beat.audioParams?.padding !== undefined) {
18
+ return beat.audioParams.padding;
13
19
  }
14
- else {
15
- // TODO: Remove hard-coded 0.8 and 0.3
16
- resolve(metadata.format.duration + (isLastGap ? 0.8 : 0.3));
20
+ if (index === context.studio.beats.length - 1) {
21
+ return 0;
17
22
  }
18
- });
19
- });
20
- };
21
- await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
22
- const isLastGap = index === context.studio.beats.length - 2;
23
- if (studioBeat.audioFile) {
24
- command.input(studioBeat.audioFile);
25
- command.input(isLastGap ? silentLastPath : silentPath);
26
- studioBeat.duration = await getDuration(studioBeat.audioFile, isLastGap);
23
+ return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
24
+ })();
25
+ const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
26
+ const totalPadding = await (async () => {
27
+ if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
28
+ const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
29
+ const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
30
+ if (movieDuration > audioDuration) {
31
+ return padding + (movieDuration - audioDuration);
32
+ }
33
+ }
34
+ return padding;
35
+ })();
36
+ studioBeat.duration = audioDuration + totalPadding;
37
+ if (totalPadding > 0) {
38
+ const silentId = silentIds.pop();
39
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${totalPadding}[padding_${index}]`);
40
+ return [audioId, `[padding_${index}]`];
41
+ }
42
+ else {
43
+ return [audioId];
44
+ }
27
45
  }
28
46
  else {
29
- GraphAILogger.error("Missing studioBeat.audioFile:", index);
47
+ // NOTE: We come here when the text is empty and no audio property is specified.
48
+ studioBeat.duration = beat.duration ?? 1.0;
49
+ const silentId = silentIds.pop();
50
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${studioBeat.duration}[silent_${index}]`);
51
+ return [`[silent_${index}]`];
30
52
  }
31
- }));
32
- await new Promise((resolve, reject) => {
33
- command
34
- .on("end", () => {
35
- resolve(0);
36
- })
37
- .on("error", (err) => {
38
- GraphAILogger.info("Error while combining MP3 files:", err);
39
- reject(err);
40
- })
41
- .mergeToFile(combinedFileName, audioDirPath);
53
+ }))).flat();
54
+ silentIds.forEach((silentId) => {
55
+ GraphAILogger.log(`Using extra silentId: ${silentId}`);
56
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}[silent_extra]`);
57
+ inputIds.push("[silent_extra]");
42
58
  });
59
+ ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
60
+ await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
43
61
  return {
44
62
  studio: context.studio,
45
63
  };
@@ -2,12 +2,11 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
- import mulmoPromptsAgent from "./mulmo_prompts_agent.js";
6
5
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
7
6
  import ttsOpenaiAgent from "./tts_openai_agent.js";
8
- import validateMulmoScriptAgent from "./validate_mulmo_script_agent.js";
7
+ import validateSchemaAgent from "./validate_schema_agent.js";
9
8
  import { browserlessAgent } from "@graphai/browserless_agent";
10
9
  import { textInputAgent } from "@graphai/input_agents";
11
10
  import { openAIAgent } from "@graphai/openai_agent";
12
11
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
13
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, mulmoPromptsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateMulmoScriptAgent, };
12
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -2,13 +2,12 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
- import mulmoPromptsAgent from "./mulmo_prompts_agent.js";
6
5
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
7
6
  import ttsOpenaiAgent from "./tts_openai_agent.js";
8
- import validateMulmoScriptAgent from "./validate_mulmo_script_agent.js";
7
+ import validateSchemaAgent from "./validate_schema_agent.js";
9
8
  import { browserlessAgent } from "@graphai/browserless_agent";
10
9
  import { textInputAgent } from "@graphai/input_agents";
11
10
  import { openAIAgent } from "@graphai/openai_agent";
12
11
  // import * as vanilla from "@graphai/vanilla";
13
12
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
14
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, mulmoPromptsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateMulmoScriptAgent, };
13
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -0,0 +1,4 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ export declare const ttsGoogleAgent: AgentFunction;
3
+ declare const ttsGoogleAgentInfo: AgentFunctionInfo;
4
+ export default ttsGoogleAgentInfo;
@@ -0,0 +1,51 @@
1
+ import { GraphAILogger } from "graphai";
2
+ import * as textToSpeech from "@google-cloud/text-to-speech";
3
+ const client = new textToSpeech.TextToSpeechClient();
4
+ export const ttsGoogleAgent = async ({ namedInputs, params }) => {
5
+ const { text } = namedInputs;
6
+ const { voice, suppressError, speed } = params;
7
+ // Construct the voice request
8
+ const voiceParams = {
9
+ languageCode: "en-US", // TODO: Make this configurable
10
+ ssmlGender: "FEMALE", // TODO: Make this configurable
11
+ };
12
+ if (voice) {
13
+ voiceParams.name = voice;
14
+ }
15
+ // Construct the request
16
+ const request = {
17
+ input: { text: text },
18
+ voice: voiceParams,
19
+ audioConfig: {
20
+ audioEncoding: "MP3",
21
+ speakingRate: speed || 1.0,
22
+ },
23
+ };
24
+ try {
25
+ // Call the Text-to-Speech API
26
+ const [response] = await client.synthesizeSpeech(request);
27
+ return { buffer: response.audioContent };
28
+ }
29
+ catch (e) {
30
+ if (suppressError) {
31
+ return {
32
+ error: e,
33
+ };
34
+ }
35
+ GraphAILogger.info(e);
36
+ throw new Error("TTS Google Error");
37
+ }
38
+ };
39
+ const ttsGoogleAgentInfo = {
40
+ name: "ttsGoogleAgent",
41
+ agent: ttsGoogleAgent,
42
+ mock: ttsGoogleAgent,
43
+ samples: [],
44
+ description: "Google TTS agent",
45
+ category: ["tts"],
46
+ author: "Receptron Team",
47
+ repository: "https://github.com/receptron/graphai-agents/tree/main/tts/tts-openai-agent",
48
+ license: "MIT",
49
+ environmentVariables: ["OPENAI_API_KEY"],
50
+ };
51
+ export default ttsGoogleAgentInfo;
@@ -0,0 +1,19 @@
1
+ import type { AgentFunction, AgentFunctionInfo, DefaultConfigData } from "graphai";
2
+ import { MulmoScript } from "../types/index.js";
3
+ import { ZodSchema } from "zod";
4
+ interface ValidateMulmoScriptInputs {
5
+ text: string;
6
+ schema: ZodSchema;
7
+ }
8
+ interface ValidateMulmoScriptResponse {
9
+ isValid: boolean;
10
+ data?: MulmoScript;
11
+ error?: string;
12
+ }
13
+ /**
14
+ * Zod schema validation agent
15
+ * Validates if a JSON string conforms to the Zod schema
16
+ */
17
+ export declare const validateSchemaAgent: AgentFunction<object, ValidateMulmoScriptResponse, ValidateMulmoScriptInputs, DefaultConfigData>;
18
+ declare const validateMulmoScriptAgentInfo: AgentFunctionInfo;
19
+ export default validateMulmoScriptAgentInfo;
@@ -0,0 +1,36 @@
1
+ import assert from "node:assert";
2
+ /**
3
+ * Zod schema validation agent
4
+ * Validates if a JSON string conforms to the Zod schema
5
+ */
6
+ export const validateSchemaAgent = async ({ namedInputs, }) => {
7
+ const { text, schema } = namedInputs;
8
+ try {
9
+ assert(schema, "schema is required");
10
+ assert(text, "text is required");
11
+ const jsonData = JSON.parse(text);
12
+ const parsed = schema.parse(jsonData);
13
+ return {
14
+ isValid: true,
15
+ data: parsed,
16
+ };
17
+ }
18
+ catch (error) {
19
+ return {
20
+ isValid: false,
21
+ error: error instanceof Error ? error.message : String(error),
22
+ };
23
+ }
24
+ };
25
+ const validateMulmoScriptAgentInfo = {
26
+ name: "validateSchemaAgent",
27
+ agent: validateSchemaAgent,
28
+ mock: validateSchemaAgent,
29
+ samples: [],
30
+ description: "Validates if a JSON string conforms to the Zod schema",
31
+ category: ["validation"],
32
+ author: "Receptron Team",
33
+ repository: "https://github.com/receptron/mulmocast-cli/tree/main/src/agents/validate_schema_agent.ts",
34
+ license: "MIT",
35
+ };
36
+ export default validateMulmoScriptAgentInfo;
package/lib/cli/args.d.ts CHANGED
@@ -3,9 +3,11 @@ export declare const getArgs: () => {
3
3
  v: boolean;
4
4
  o: string | undefined;
5
5
  b: string | undefined;
6
+ l: string | undefined;
6
7
  a: string | undefined;
7
8
  i: string | undefined;
8
9
  f: boolean;
10
+ c: string | undefined;
9
11
  pdf_mode: string;
10
12
  pdf_size: string;
11
13
  _: (string | number)[];
package/lib/cli/args.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import yargs from "yargs";
2
2
  import { hideBin } from "yargs/helpers";
3
3
  import { commonOptions } from "./common.js";
4
- import { pdf_modes, pdf_sizes } from "../utils/const.js";
4
+ import { pdf_modes, pdf_sizes, languages } from "../utils/const.js";
5
5
  export const getArgs = () => {
6
6
  return commonOptions(yargs(hideBin(process.argv)))
7
7
  .scriptName("mulmo")
@@ -23,6 +23,13 @@ export const getArgs = () => {
23
23
  demandOption: false,
24
24
  default: false,
25
25
  type: "boolean",
26
+ })
27
+ .option("c", {
28
+ alias: "caption",
29
+ description: "Video captions",
30
+ choices: languages,
31
+ demandOption: false,
32
+ type: "string",
26
33
  })
27
34
  .option("pdf_mode", {
28
35
  description: "pdf mode",
@@ -40,7 +47,7 @@ export const getArgs = () => {
40
47
  return yargs
41
48
  .positional("action", {
42
49
  describe: "action to perform",
43
- choices: ["translate", "audio", "images", "movie", "pdf", "preprocess"],
50
+ choices: ["translate", "audio", "images", "movie", "pdf"],
44
51
  type: "string",
45
52
  })
46
53
  .positional("file", {
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import "dotenv/config";
3
+ export declare const main: () => Promise<void>;
package/lib/cli/bin.js ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env node
2
+ import "dotenv/config";
3
+ import yargs from "yargs/yargs";
4
+ import { hideBin } from "yargs/helpers";
5
+ import * as translateCmd from "./commands/translate/index.js";
6
+ import * as audioCmd from "./commands/audio/index.js";
7
+ import * as imagesCmd from "./commands/image/index.js";
8
+ import * as movieCmd from "./commands/movie/index.js";
9
+ import * as pdfCmd from "./commands/pdf/index.js";
10
+ import * as toolCmd from "./commands/tool/index.js";
11
+ import { GraphAILogger } from "graphai";
12
+ export const main = async () => {
13
+ const cli = yargs(hideBin(process.argv))
14
+ .scriptName("mulmo")
15
+ .usage("$0 <command> [options]")
16
+ .option("v", {
17
+ alias: "verbose",
18
+ describe: "verbose log",
19
+ demandOption: true,
20
+ default: false,
21
+ type: "boolean",
22
+ })
23
+ .command(translateCmd)
24
+ .command(audioCmd)
25
+ .command(imagesCmd)
26
+ .command(movieCmd)
27
+ .command(pdfCmd)
28
+ .command(toolCmd)
29
+ .demandCommand()
30
+ .strict()
31
+ .help()
32
+ .alias("help", "h");
33
+ await cli.parseAsync();
34
+ };
35
+ main().catch((error) => {
36
+ GraphAILogger.info("An unexpected error occurred:", error);
37
+ process.exit(1);
38
+ });