mulmocast 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +294 -39
  2. package/assets/audio/silent60sec.mp3 +0 -0
  3. package/assets/html/caption.html +45 -0
  4. package/assets/html/chart.html +1 -1
  5. package/assets/html/mermaid.html +6 -2
  6. package/assets/html/tailwind.html +13 -0
  7. package/assets/templates/business.json +2 -128
  8. package/assets/templates/children_book.json +1 -128
  9. package/assets/templates/coding.json +2 -136
  10. package/assets/templates/comic_strips.json +6 -0
  11. package/assets/templates/ghibli_strips.json +6 -0
  12. package/assets/templates/sensei_and_taro.json +1 -118
  13. package/lib/actions/audio.js +62 -39
  14. package/lib/actions/captions.d.ts +2 -0
  15. package/lib/actions/captions.js +75 -0
  16. package/lib/actions/images.js +34 -13
  17. package/lib/actions/index.d.ts +1 -0
  18. package/lib/actions/index.js +1 -0
  19. package/lib/actions/movie.js +102 -101
  20. package/lib/actions/pdf.js +26 -6
  21. package/lib/actions/translate.js +60 -39
  22. package/lib/agents/add_bgm_agent.js +15 -39
  23. package/lib/agents/combine_audio_files_agent.js +53 -35
  24. package/lib/agents/index.d.ts +2 -3
  25. package/lib/agents/index.js +2 -3
  26. package/lib/agents/tts_google_agent.d.ts +4 -0
  27. package/lib/agents/tts_google_agent.js +51 -0
  28. package/lib/agents/validate_schema_agent.d.ts +19 -0
  29. package/lib/agents/validate_schema_agent.js +36 -0
  30. package/lib/cli/args.d.ts +2 -0
  31. package/lib/cli/args.js +9 -2
  32. package/lib/cli/bin.d.ts +3 -0
  33. package/lib/cli/bin.js +38 -0
  34. package/lib/cli/cli.js +34 -7
  35. package/lib/cli/commands/audio/builder.d.ts +14 -0
  36. package/lib/cli/commands/audio/builder.js +6 -0
  37. package/lib/cli/commands/audio/handler.d.ts +4 -0
  38. package/lib/cli/commands/audio/handler.js +7 -0
  39. package/lib/cli/commands/audio/index.d.ts +4 -0
  40. package/lib/cli/commands/audio/index.js +4 -0
  41. package/lib/cli/commands/image/builder.d.ts +14 -0
  42. package/lib/cli/commands/image/builder.js +6 -0
  43. package/lib/cli/commands/image/handler.d.ts +4 -0
  44. package/lib/cli/commands/image/handler.js +7 -0
  45. package/lib/cli/commands/image/index.d.ts +4 -0
  46. package/lib/cli/commands/image/index.js +4 -0
  47. package/lib/cli/commands/movie/builder.d.ts +18 -0
  48. package/lib/cli/commands/movie/builder.js +19 -0
  49. package/lib/cli/commands/movie/handler.d.ts +6 -0
  50. package/lib/cli/commands/movie/handler.js +12 -0
  51. package/lib/cli/commands/movie/index.d.ts +4 -0
  52. package/lib/cli/commands/movie/index.js +4 -0
  53. package/lib/cli/commands/pdf/builder.d.ts +18 -0
  54. package/lib/cli/commands/pdf/builder.js +19 -0
  55. package/lib/cli/commands/pdf/handler.d.ts +6 -0
  56. package/lib/cli/commands/pdf/handler.js +8 -0
  57. package/lib/cli/commands/pdf/index.d.ts +4 -0
  58. package/lib/cli/commands/pdf/index.js +4 -0
  59. package/lib/cli/commands/tool/index.d.ts +6 -0
  60. package/lib/cli/commands/tool/index.js +8 -0
  61. package/lib/cli/commands/tool/prompt/builder.d.ts +4 -0
  62. package/lib/cli/commands/tool/prompt/builder.js +11 -0
  63. package/lib/cli/commands/tool/prompt/handler.d.ts +4 -0
  64. package/lib/cli/commands/tool/prompt/handler.js +14 -0
  65. package/lib/cli/commands/tool/prompt/index.d.ts +4 -0
  66. package/lib/cli/commands/tool/prompt/index.js +4 -0
  67. package/lib/cli/commands/tool/schema/builder.d.ts +2 -0
  68. package/lib/cli/commands/tool/schema/builder.js +3 -0
  69. package/lib/cli/commands/tool/schema/handler.d.ts +2 -0
  70. package/lib/cli/commands/tool/schema/handler.js +12 -0
  71. package/lib/cli/commands/tool/schema/index.d.ts +4 -0
  72. package/lib/cli/commands/tool/schema/index.js +4 -0
  73. package/lib/cli/commands/tool/scripting/builder.d.ts +20 -0
  74. package/lib/cli/commands/tool/scripting/builder.js +63 -0
  75. package/lib/cli/commands/tool/scripting/handler.d.ts +13 -0
  76. package/lib/cli/commands/tool/scripting/handler.js +36 -0
  77. package/lib/cli/commands/tool/scripting/index.d.ts +4 -0
  78. package/lib/cli/commands/tool/scripting/index.js +4 -0
  79. package/lib/cli/commands/tool/story_to_script/builder.d.ts +20 -0
  80. package/lib/cli/commands/tool/story_to_script/builder.js +61 -0
  81. package/lib/cli/commands/tool/story_to_script/handler.d.ts +13 -0
  82. package/lib/cli/commands/tool/story_to_script/handler.js +36 -0
  83. package/lib/cli/commands/tool/story_to_script/index.d.ts +4 -0
  84. package/lib/cli/commands/tool/story_to_script/index.js +4 -0
  85. package/lib/cli/commands/translate/builder.d.ts +14 -0
  86. package/lib/cli/commands/translate/builder.js +5 -0
  87. package/lib/cli/commands/translate/handler.d.ts +4 -0
  88. package/lib/cli/commands/translate/handler.js +6 -0
  89. package/lib/cli/commands/translate/index.d.ts +4 -0
  90. package/lib/cli/commands/translate/index.js +4 -0
  91. package/lib/cli/common.d.ts +6 -2
  92. package/lib/cli/common.js +18 -7
  93. package/lib/cli/helpers.d.ts +38 -0
  94. package/lib/cli/helpers.js +115 -0
  95. package/lib/cli/tool-args.d.ts +1 -0
  96. package/lib/cli/tool-args.js +1 -1
  97. package/lib/cli/tool-cli.js +8 -0
  98. package/lib/methods/mulmo_script.d.ts +0 -1
  99. package/lib/methods/mulmo_script.js +4 -7
  100. package/lib/methods/mulmo_script_template.d.ts +2 -2
  101. package/lib/methods/mulmo_script_template.js +3 -13
  102. package/lib/methods/mulmo_studio.d.ts +8 -0
  103. package/lib/methods/mulmo_studio.js +24 -0
  104. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
  105. package/lib/tools/create_mulmo_script_from_url.js +43 -14
  106. package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
  107. package/lib/tools/create_mulmo_script_interactively.js +21 -20
  108. package/lib/tools/dump_prompt.js +2 -0
  109. package/lib/tools/story_to_script.d.ts +12 -0
  110. package/lib/tools/story_to_script.js +275 -0
  111. package/lib/types/cli_types.d.ts +14 -0
  112. package/lib/types/cli_types.js +1 -0
  113. package/lib/types/schema.d.ts +637 -1766
  114. package/lib/types/schema.js +77 -8
  115. package/lib/types/type.d.ts +10 -3
  116. package/lib/utils/const.d.ts +5 -0
  117. package/lib/utils/const.js +5 -0
  118. package/lib/utils/ffmpeg_utils.d.ts +12 -0
  119. package/lib/utils/ffmpeg_utils.js +63 -0
  120. package/lib/utils/file.d.ts +8 -3
  121. package/lib/utils/file.js +40 -9
  122. package/lib/utils/filters.js +16 -11
  123. package/lib/utils/image_plugins/chart.js +6 -1
  124. package/lib/utils/image_plugins/html_tailwind.d.ts +3 -0
  125. package/lib/utils/image_plugins/html_tailwind.js +18 -0
  126. package/lib/utils/image_plugins/index.d.ts +2 -1
  127. package/lib/utils/image_plugins/index.js +2 -1
  128. package/lib/utils/image_plugins/mermaid.js +1 -1
  129. package/lib/utils/image_plugins/tailwind.d.ts +3 -0
  130. package/lib/utils/image_plugins/tailwind.js +18 -0
  131. package/lib/utils/image_plugins/text_slide.js +9 -2
  132. package/lib/utils/markdown.d.ts +1 -1
  133. package/lib/utils/markdown.js +8 -4
  134. package/lib/utils/preprocess.d.ts +40 -10
  135. package/lib/utils/preprocess.js +7 -2
  136. package/lib/utils/prompt.d.ts +16 -0
  137. package/lib/utils/prompt.js +74 -0
  138. package/lib/utils/utils.d.ts +10 -5
  139. package/lib/utils/utils.js +37 -17
  140. package/package.json +27 -23
@@ -4,7 +4,7 @@ const URLStringSchema = z.string().url();
4
4
  export const localizedTextSchema = z
5
5
  .object({
6
6
  text: z.string(),
7
- lang: z.string(),
7
+ lang: langSchema,
8
8
  // caption: z.string(),
9
9
  texts: z.array(z.string()).optional(),
10
10
  ttsTexts: z.array(z.string()).optional(),
@@ -77,7 +77,8 @@ export const mulmoTextSlideMediaSchema = z
77
77
  type: z.literal("textSlide"),
78
78
  slide: z.object({
79
79
  title: z.string(),
80
- bullets: z.array(z.string()),
80
+ subtitle: z.string().optional(),
81
+ bullets: z.array(z.string()).optional(),
81
82
  }),
82
83
  })
83
84
  .strict();
@@ -96,6 +97,12 @@ export const mulmoMermaidMediaSchema = z
96
97
  appendix: z.array(z.string()).optional().describe("The appendix of the mermaid diagram; typically, style information."),
97
98
  })
98
99
  .strict();
100
+ export const mulmoHtmlTailwindMediaSchema = z
101
+ .object({
102
+ type: z.literal("html_tailwind"),
103
+ html: stringOrStringArray,
104
+ })
105
+ .strict();
99
106
  export const mulmoImageAssetSchema = z.union([
100
107
  mulmoMarkdownMediaSchema,
101
108
  mulmoWebMediaSchema,
@@ -106,6 +113,7 @@ export const mulmoImageAssetSchema = z.union([
106
113
  mulmoTextSlideMediaSchema,
107
114
  mulmoChartMediaSchema,
108
115
  mulmoMermaidMediaSchema,
116
+ mulmoHtmlTailwindMediaSchema,
109
117
  ]);
110
118
  const mulmoAudioMediaSchema = z
111
119
  .object({
@@ -133,18 +141,36 @@ export const textSlideParamsSchema = z
133
141
  cssStyles: stringOrStringArray,
134
142
  })
135
143
  .strict();
144
+ /* TODO: Add something later
136
145
  export const videoParamsSchema = z
137
- .object({
146
+ .object({
138
147
  padding: z.number().optional(), // msec
148
+ })
149
+ .strict();
150
+ */
151
+ export const beatAudioParamsSchema = z
152
+ .object({
153
+ padding: z.number().optional().describe("Padding between beats"), // seconds
154
+ })
155
+ .strict();
156
+ // Note: we can't extend beatAudioParamsSchema because it has padding as optional
157
+ export const audioParamsSchema = z
158
+ .object({
159
+ padding: z.number().describe("Padding between beats"), // seconds
160
+ introPadding: z.number().describe("Padding at the beginning of the audio"), // seconds
161
+ closingPadding: z.number().describe("Padding before the last beat"), // seconds
162
+ outroPadding: z.number().describe("Padding at the end of the audio"), // seconds
139
163
  })
140
164
  .strict();
141
165
  export const mulmoBeatSchema = z
142
166
  .object({
143
167
  speaker: speakerIdSchema.default("Presenter"),
144
- text: z.string(),
168
+ text: z.string().describe("Text to be spoken. If empty, the audio is not generated."),
145
169
  image: mulmoImageAssetSchema.optional(),
146
170
  audio: mulmoAudioAssetSchema.optional(),
171
+ duration: z.number().optional().describe("Duration of the beat. Used only when the text is empty"),
147
172
  imageParams: mulmoImageParamsSchema.optional(), // beat specific parameters
173
+ audioParams: beatAudioParamsSchema.optional(), // beat specific parameters
148
174
  speechOptions: speechOptionsSchema.optional(),
149
175
  textSlideParams: textSlideParamsSchema.optional(),
150
176
  imagePrompt: z.string().optional(), // specified or inserted by preprocessor
@@ -163,7 +189,7 @@ export const mulmoCastCreditSchema = z
163
189
  credit: z.literal("closing").optional(),
164
190
  })
165
191
  .strict();
166
- export const text2SpeechProviderSchema = z.union([z.literal("openai"), z.literal("nijivoice")]).default("openai");
192
+ export const text2SpeechProviderSchema = z.union([z.literal("openai"), z.literal("nijivoice"), z.literal("google")]).default("openai");
167
193
  export const mulmoSpeechParamsSchema = z
168
194
  .object({
169
195
  provider: text2SpeechProviderSchema, // has default value
@@ -191,7 +217,13 @@ export const mulmoPresentationStyleSchema = z.object({
191
217
  .optional(),
192
218
  // for textSlides
193
219
  textSlideParams: textSlideParamsSchema.optional(),
194
- videoParams: videoParamsSchema.optional(),
220
+ // videoParams: videoParamsSchema.optional(),
221
+ audioParams: audioParamsSchema.default({
222
+ introPadding: 1.0,
223
+ padding: 0.3,
224
+ closingPadding: 0.8,
225
+ outroPadding: 1.0,
226
+ }),
195
227
  // TODO: Switch to showCaptions later
196
228
  omitCaptions: z.boolean().optional(), // default is false
197
229
  });
@@ -216,18 +248,55 @@ export const mulmoScriptSchema = mulmoPresentationStyleSchema
216
248
  .strict();
217
249
  export const mulmoStudioBeatSchema = z
218
250
  .object({
219
- multiLingualTexts: multiLingualTextsSchema.optional(),
220
251
  hash: z.string().optional(),
221
252
  duration: z.number().optional(),
222
253
  audioFile: z.string().optional(),
223
254
  imageFile: z.string().optional(), // path to the image
255
+ captionFile: z.string().optional(), // path to the caption image
224
256
  })
225
257
  .strict();
258
+ export const mulmoStudioMultiLingualDataSchema = z.object({
259
+ multiLingualTexts: multiLingualTextsSchema,
260
+ });
261
+ export const mulmoStudioMultiLingualSchema = z.array(mulmoStudioMultiLingualDataSchema).min(1);
262
+ export const mulmoSessionStateSchema = z.object({
263
+ inSession: z.object({
264
+ audio: z.boolean(),
265
+ image: z.boolean(),
266
+ video: z.boolean(),
267
+ multiLingual: z.boolean(),
268
+ caption: z.boolean(),
269
+ pdf: z.boolean(),
270
+ }),
271
+ inBeatSession: z.object({
272
+ audio: z.set(z.number()),
273
+ image: z.set(z.number()),
274
+ multiLingual: z.set(z.number()),
275
+ caption: z.set(z.number()),
276
+ }),
277
+ });
226
278
  export const mulmoStudioSchema = z
227
279
  .object({
228
280
  script: mulmoScriptSchema,
229
281
  filename: z.string(),
230
282
  beats: z.array(mulmoStudioBeatSchema).min(1),
283
+ multiLingual: mulmoStudioMultiLingualSchema,
284
+ state: mulmoSessionStateSchema.default({
285
+ inSession: {
286
+ audio: false,
287
+ image: false,
288
+ video: false,
289
+ multiLingual: false,
290
+ caption: false,
291
+ pdf: false,
292
+ },
293
+ inBeatSession: {
294
+ audio: new Set(),
295
+ image: new Set(),
296
+ multiLingual: new Set(),
297
+ caption: new Set(),
298
+ },
299
+ }),
231
300
  })
232
301
  .strict();
233
302
  export const mulmoScriptTemplateSchema = z
@@ -235,7 +304,7 @@ export const mulmoScriptTemplateSchema = z
235
304
  title: z.string(),
236
305
  description: z.string(),
237
306
  systemPrompt: z.string(),
238
- script: mulmoScriptSchema.optional(),
307
+ scriptName: z.string().optional(),
239
308
  })
240
309
  .strict();
241
310
  export const mulmoStoryboardSceneSchema = z
@@ -1,5 +1,6 @@
1
- import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, text2ImageProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema } from "./schema.js";
2
- import { pdf_modes, pdf_sizes } from "../utils/const.js";
1
+ import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualDataSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, text2ImageProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema } from "./schema.js";
2
+ import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "../utils/const.js";
3
+ import { LLM } from "../utils/utils.js";
3
4
  import { z } from "zod";
4
5
  export type LANG = z.infer<typeof langSchema>;
5
6
  export type MulmoBeat = z.infer<typeof mulmoBeatSchema>;
@@ -20,6 +21,9 @@ export type MulmoStudioBeat = z.infer<typeof mulmoStudioBeatSchema>;
20
21
  export type MulmoMediaSource = z.infer<typeof mediaSourceSchema>;
21
22
  export type MulmoStudio = z.infer<typeof mulmoStudioSchema>;
22
23
  export type MulmoScriptTemplate = z.infer<typeof mulmoScriptTemplateSchema>;
24
+ export type MulmoStudioMultiLingual = z.infer<typeof mulmoStudioMultiLingualSchema>;
25
+ export type MulmoStudioMultiLingualData = z.infer<typeof mulmoStudioMultiLingualDataSchema>;
26
+ export type MultiLingualTexts = z.infer<typeof multiLingualTextsSchema>;
23
27
  export type MulmoTextSlideMedia = z.infer<typeof mulmoTextSlideMediaSchema>;
24
28
  export type MulmoMarkdownMedia = z.infer<typeof mulmoMarkdownMediaSchema>;
25
29
  export type MulmoImageMedia = z.infer<typeof mulmoImageMediaSchema>;
@@ -36,7 +40,9 @@ export type FileDirs = {
36
40
  export type MulmoStudioContext = {
37
41
  fileDirs: FileDirs;
38
42
  studio: MulmoStudio;
43
+ lang?: string;
39
44
  force: boolean;
45
+ caption?: string;
40
46
  };
41
47
  export type ScriptingParams = {
42
48
  urls: string[];
@@ -45,7 +51,7 @@ export type ScriptingParams = {
45
51
  templateName: string;
46
52
  filename: string;
47
53
  llm_model?: string;
48
- llm_agent?: string;
54
+ llm?: LLM;
49
55
  };
50
56
  export type ImageProcessorParams = {
51
57
  beat: MulmoBeat;
@@ -62,3 +68,4 @@ export type Text2ImageAgentInfo = {
62
68
  imageParams: MulmoImageParams;
63
69
  };
64
70
  export type BeatMediaType = "movie" | "image";
71
+ export type StoryToScriptGenerateMode = (typeof storyToScriptGenerateMode)[keyof typeof storyToScriptGenerateMode];
@@ -4,3 +4,8 @@ export declare const imageDirName = "images";
4
4
  export declare const cacheDirName = "cache";
5
5
  export declare const pdf_modes: string[];
6
6
  export declare const pdf_sizes: string[];
7
+ export declare const languages: string[];
8
+ export declare const storyToScriptGenerateMode: {
9
+ stepWise: string;
10
+ oneStep: string;
11
+ };
@@ -4,3 +4,8 @@ export const imageDirName = "images";
4
4
  export const cacheDirName = "cache";
5
5
  export const pdf_modes = ["slide", "talk", "handout"];
6
6
  export const pdf_sizes = ["letter", "a4"];
7
+ export const languages = ["en", "ja"];
8
+ export const storyToScriptGenerateMode = {
9
+ stepWise: "step_wise",
10
+ oneStep: "one_step",
11
+ };
@@ -0,0 +1,12 @@
1
+ import ffmpeg from "fluent-ffmpeg";
2
+ export type FfmpegContext = {
3
+ command: ffmpeg.FfmpegCommand;
4
+ inputCount: number;
5
+ filterComplex: string[];
6
+ };
7
+ export declare const FfmpegContextInit: () => FfmpegContext;
8
+ export declare const FfmpegContextAddInput: (context: FfmpegContext, input: string) => number;
9
+ export declare const FfmpegContextPushFormattedAudio: (context: FfmpegContext, sourceId: string, outputId: string, duration?: number | undefined) => void;
10
+ export declare const FfmpegContextInputFormattedAudio: (context: FfmpegContext, input: string, duration?: number | undefined) => string;
11
+ export declare const FfmpegContextGenerateOutput: (context: FfmpegContext, output: string, options?: string[]) => Promise<number>;
12
+ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<number>;
@@ -0,0 +1,63 @@
1
+ import ffmpeg from "fluent-ffmpeg";
2
+ import { GraphAILogger } from "graphai";
3
+ export const FfmpegContextInit = () => {
4
+ return {
5
+ command: ffmpeg(),
6
+ inputCount: 0,
7
+ filterComplex: [],
8
+ };
9
+ };
10
+ export const FfmpegContextAddInput = (context, input) => {
11
+ context.command.input(input);
12
+ context.inputCount++;
13
+ return context.inputCount - 1; // returned the index of the input
14
+ };
15
+ export const FfmpegContextPushFormattedAudio = (context, sourceId, outputId, duration = undefined) => {
16
+ if (duration !== undefined) {
17
+ context.filterComplex.push(`${sourceId}atrim=duration=${duration},aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo${outputId}`);
18
+ }
19
+ else {
20
+ context.filterComplex.push(`${sourceId}aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo${outputId}`);
21
+ }
22
+ };
23
+ export const FfmpegContextInputFormattedAudio = (context, input, duration = undefined) => {
24
+ const index = FfmpegContextAddInput(context, input);
25
+ const audioId = `[a${index}]`;
26
+ FfmpegContextPushFormattedAudio(context, `[${index}:a]`, audioId, duration);
27
+ return audioId;
28
+ };
29
+ export const FfmpegContextGenerateOutput = (context, output, options = []) => {
30
+ return new Promise((resolve, reject) => {
31
+ context.command
32
+ .complexFilter(context.filterComplex)
33
+ .outputOptions(options)
34
+ .output(output)
35
+ .on("start", (__cmdLine) => {
36
+ GraphAILogger.log("Started FFmpeg ..."); // with command:', cmdLine);
37
+ })
38
+ .on("error", (err, stdout, stderr) => {
39
+ GraphAILogger.error("Error occurred:", err);
40
+ GraphAILogger.error("FFmpeg stdout:", stdout);
41
+ GraphAILogger.error("FFmpeg stderr:", stderr);
42
+ GraphAILogger.info("Video/Audio creation failed. An unexpected error occurred.");
43
+ reject();
44
+ })
45
+ .on("end", () => {
46
+ resolve(0);
47
+ })
48
+ .run();
49
+ });
50
+ };
51
+ export const ffmpegGetMediaDuration = (filePath) => {
52
+ return new Promise((resolve, reject) => {
53
+ ffmpeg.ffprobe(filePath, (err, metadata) => {
54
+ if (err) {
55
+ GraphAILogger.info("Error while getting metadata:", err);
56
+ reject(err);
57
+ }
58
+ else {
59
+ resolve(metadata.format.duration);
60
+ }
61
+ });
62
+ });
63
+ };
@@ -1,5 +1,6 @@
1
1
  import { MulmoScript, MulmoScriptTemplate, MulmoMediaSource, MulmoStudioContext } from "../types/index.js";
2
2
  import { PDFMode } from "../types/index.js";
3
+ import { ZodSchema } from "zod";
3
4
  export declare function readMulmoScriptFile<T = MulmoScript>(path: string, errorMessage: string): {
4
5
  mulmoData: T;
5
6
  mulmoDataPath: string;
@@ -24,23 +25,27 @@ export declare const fetchMulmoScriptFile: (url: string) => Promise<{
24
25
  script?: undefined;
25
26
  }>;
26
27
  export declare const getOutputStudioFilePath: (outDirPath: string, fileName: string) => string;
28
+ export declare const resolveDirPath: (dirPath: string, studioFileName: string) => string;
27
29
  export declare const getAudioSegmentDirPath: (audioDirPath: string, studioFileName: string) => string;
28
30
  export declare const getAudioSegmentFilePath: (audioDirPath: string, studioFileName: string, fileName: string) => string;
29
- export declare const getAudioCombinedFilePath: (audioDirPath: string, fileName: string) => string;
31
+ export declare const getAudioCombinedFilePath: (audioDirPath: string, fileName: string, lang?: string) => string;
30
32
  export declare const getAudioArtifactFilePath: (outDirPath: string, fileName: string) => string;
31
- export declare const getOutputVideoFilePath: (outDirPath: string, fileName: string) => string;
32
- export declare const getOutputPdfFilePath: (outDirPath: string, fileName: string, pdfMode: PDFMode) => string;
33
+ export declare const getOutputVideoFilePath: (outDirPath: string, fileName: string, lang?: string, caption?: string) => string;
34
+ export declare const getOutputPdfFilePath: (outDirPath: string, fileName: string, pdfMode: PDFMode, lang?: string) => string;
33
35
  export declare const getTemplateFilePath: (templateName: string) => string;
34
36
  export declare const mkdir: (dirPath: string) => void;
35
37
  export declare const silentPath: string;
36
38
  export declare const silentLastPath: string;
39
+ export declare const silent60secPath: string;
37
40
  export declare const defaultBGMPath: string;
38
41
  export declare const getHTMLFile: (filename: string) => string;
39
42
  export declare const getBaseDirPath: (basedir?: string) => string;
40
43
  export declare const getFullPath: (baseDirPath: string | undefined, file: string) => string;
44
+ export declare const readScriptTemplateFile: (scriptName: string) => any;
41
45
  export declare const readTemplatePrompt: (templateName: string) => string;
42
46
  export declare const getAvailableTemplates: () => (MulmoScriptTemplate & {
43
47
  filename: string;
44
48
  })[];
45
49
  export declare const writingMessage: (filePath: string) => void;
46
50
  export declare const resolveMediaSource: (source: MulmoMediaSource, context: MulmoStudioContext) => string | null;
51
+ export declare const readAndParseJson: <S extends ZodSchema<any>>(filePath: string, schema: S) => ReturnType<S["parse"]>;
package/lib/utils/file.js CHANGED
@@ -45,23 +45,35 @@ export const fetchMulmoScriptFile = async (url) => {
45
45
  export const getOutputStudioFilePath = (outDirPath, fileName) => {
46
46
  return path.resolve(outDirPath, fileName + "_studio.json");
47
47
  };
48
+ export const resolveDirPath = (dirPath, studioFileName) => {
49
+ return path.resolve(dirPath, studioFileName);
50
+ };
51
+ // TODO: probably better to just use resolveDirPath instead.
48
52
  export const getAudioSegmentDirPath = (audioDirPath, studioFileName) => {
49
53
  return path.resolve(audioDirPath, studioFileName);
50
54
  };
51
55
  export const getAudioSegmentFilePath = (audioDirPath, studioFileName, fileName) => {
52
56
  return path.resolve(getAudioSegmentDirPath(audioDirPath, studioFileName), fileName + ".mp3");
53
57
  };
54
- export const getAudioCombinedFilePath = (audioDirPath, fileName) => {
58
+ export const getAudioCombinedFilePath = (audioDirPath, fileName, lang) => {
59
+ if (lang) {
60
+ return path.resolve(audioDirPath, fileName, `${fileName}_${lang}.mp3`);
61
+ }
55
62
  return path.resolve(audioDirPath, fileName, fileName + ".mp3");
56
63
  };
57
64
  export const getAudioArtifactFilePath = (outDirPath, fileName) => {
58
65
  return path.resolve(outDirPath, fileName + ".mp3");
59
66
  };
60
- export const getOutputVideoFilePath = (outDirPath, fileName) => {
61
- return path.resolve(outDirPath, fileName + ".mp4");
67
+ export const getOutputVideoFilePath = (outDirPath, fileName, lang, caption) => {
68
+ const suffix = lang ? `_${lang}` : "";
69
+ const suffix2 = caption ? `__${caption}` : "";
70
+ return path.resolve(outDirPath, `${fileName}${suffix}${suffix2}.mp4`);
62
71
  };
63
- export const getOutputPdfFilePath = (outDirPath, fileName, pdfMode) => {
64
- return path.resolve(outDirPath, fileName + "_" + pdfMode + ".pdf");
72
+ export const getOutputPdfFilePath = (outDirPath, fileName, pdfMode, lang) => {
73
+ if (lang) {
74
+ return path.resolve(outDirPath, `${fileName}_${pdfMode}_${lang}.pdf`);
75
+ }
76
+ return path.resolve(outDirPath, `${fileName}_${pdfMode}.pdf`);
65
77
  };
66
78
  export const getTemplateFilePath = (templateName) => {
67
79
  return path.resolve(__dirname, "../../assets/templates/" + templateName + ".json");
@@ -74,6 +86,7 @@ export const mkdir = (dirPath) => {
74
86
  };
75
87
  export const silentPath = path.resolve(__dirname, "../../assets/audio/silent300.mp3");
76
88
  export const silentLastPath = path.resolve(__dirname, "../../assets/audio/silent800.mp3");
89
+ export const silent60secPath = path.resolve(__dirname, "../../assets/audio/silent60sec.mp3");
77
90
  export const defaultBGMPath = path.resolve(__dirname, "../../assets/music/StarsBeyondEx.mp3");
78
91
  export const getHTMLFile = (filename) => {
79
92
  const htmlPath = path.resolve(__dirname, `../../assets/html/${filename}.html`);
@@ -98,11 +111,23 @@ export const getFullPath = (baseDirPath, file) => {
98
111
  }
99
112
  return path.resolve(file);
100
113
  };
114
+ export const readScriptTemplateFile = (scriptName) => {
115
+ const scriptPath = path.resolve(__dirname, "../../scripts/templates", scriptName);
116
+ const scriptData = fs.readFileSync(scriptPath, "utf-8");
117
+ // NOTE: We don't want to schema parse the script here to eliminate default values.
118
+ return JSON.parse(scriptData);
119
+ };
101
120
  export const readTemplatePrompt = (templateName) => {
102
121
  const templatePath = getTemplateFilePath(templateName);
103
- const scriptData = fs.readFileSync(templatePath, "utf-8");
104
- const template = mulmoScriptTemplateSchema.parse(JSON.parse(scriptData));
105
- const prompt = MulmoScriptTemplateMethods.getSystemPrompt(template);
122
+ const templateData = fs.readFileSync(templatePath, "utf-8");
123
+ const template = mulmoScriptTemplateSchema.parse(JSON.parse(templateData));
124
+ const script = (() => {
125
+ if (template.scriptName) {
126
+ return readScriptTemplateFile(template.scriptName);
127
+ }
128
+ return undefined;
129
+ })();
130
+ const prompt = MulmoScriptTemplateMethods.getSystemPrompt(template, script);
106
131
  return prompt;
107
132
  };
108
133
  export const getAvailableTemplates = () => {
@@ -120,7 +145,7 @@ export const getAvailableTemplates = () => {
120
145
  });
121
146
  };
122
147
  export const writingMessage = (filePath) => {
123
- GraphAILogger.info(`writing: ${filePath}`);
148
+ GraphAILogger.debug(`writing: ${filePath}`);
124
149
  };
125
150
  export const resolveMediaSource = (source, context) => {
126
151
  if (source.kind === "path") {
@@ -131,3 +156,9 @@ export const resolveMediaSource = (source, context) => {
131
156
  }
132
157
  return null;
133
158
  };
159
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
160
+ export const readAndParseJson = (filePath, schema) => {
161
+ const fileContent = fs.readFileSync(filePath, "utf-8");
162
+ const json = JSON.parse(fileContent);
163
+ return schema.parse(json);
164
+ };
@@ -5,9 +5,10 @@ import fsPromise from "fs/promises";
5
5
  import { GraphAILogger } from "graphai";
6
6
  import { writingMessage } from "./file.js";
7
7
  import { text2hash } from "./utils.js";
8
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
8
9
  export const fileCacheAgentFilter = async (context, next) => {
9
10
  const { namedInputs } = context;
10
- const { file, text, force } = namedInputs;
11
+ const { file, force, studio, index, sessionType } = namedInputs;
11
12
  const shouldUseCache = async () => {
12
13
  if (force) {
13
14
  return false;
@@ -21,19 +22,23 @@ export const fileCacheAgentFilter = async (context, next) => {
21
22
  }
22
23
  };
23
24
  if (await shouldUseCache()) {
24
- const elements = file.split("/");
25
- GraphAILogger.info("cache hit: " + elements[elements.length - 1], text.slice(0, 10));
26
25
  return true;
27
26
  }
28
- const output = (await next(context));
29
- const buffer = output ? output["buffer"] : undefined;
30
- if (buffer) {
31
- writingMessage(file);
32
- await fsPromise.writeFile(file, buffer);
33
- return true;
27
+ try {
28
+ MulmoStudioMethods.setBeatSessionState(studio, sessionType, index, true);
29
+ const output = (await next(context));
30
+ const buffer = output ? output["buffer"] : undefined;
31
+ if (buffer) {
32
+ writingMessage(file);
33
+ await fsPromise.writeFile(file, buffer);
34
+ return true;
35
+ }
36
+ GraphAILogger.log("no cache, no buffer: " + file);
37
+ return false;
38
+ }
39
+ finally {
40
+ MulmoStudioMethods.setBeatSessionState(studio, sessionType, index, false);
34
41
  }
35
- GraphAILogger.log("no cache, no buffer: " + file);
36
- return false;
37
42
  };
38
43
  export const browserlessCacheGenerator = (cacheDir) => {
39
44
  const browserlessCache = async (context, next) => {
@@ -5,11 +5,16 @@ const processChart = async (params) => {
5
5
  const { beat, imagePath, canvasSize, textSlideStyle } = params;
6
6
  if (!beat.image || beat.image.type !== imageType)
7
7
  return;
8
+ const isCircular = beat.image.chartData.type === "pie" ||
9
+ beat.image.chartData.type === "doughnut" ||
10
+ beat.image.chartData.type === "polarArea" ||
11
+ beat.image.chartData.type === "radar";
12
+ const chart_width = isCircular ? Math.min(canvasSize.width, canvasSize.height) * 0.75 : canvasSize.width * 0.75;
8
13
  const template = getHTMLFile("chart");
9
14
  const htmlData = interpolate(template, {
10
15
  title: beat.image.title,
11
16
  style: textSlideStyle,
12
- width: Math.round(canvasSize.width * 0.625).toString(),
17
+ chart_width: chart_width.toString(),
13
18
  chart_data: JSON.stringify(beat.image.chartData),
14
19
  });
15
20
  await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height);
@@ -0,0 +1,3 @@
1
+ import { ImageProcessorParams } from "../../types/index.js";
2
+ export declare const imageType = "html_tailwind";
3
+ export declare const process: (params: ImageProcessorParams) => Promise<string | undefined>;
@@ -0,0 +1,18 @@
1
+ import { getHTMLFile } from "../file.js";
2
+ import { renderHTMLToImage, interpolate } from "../markdown.js";
3
+ export const imageType = "html_tailwind";
4
+ const processHtmlTailwind = async (params) => {
5
+ const { beat, imagePath, canvasSize } = params;
6
+ if (!beat.image || beat.image.type !== imageType)
7
+ return;
8
+ const html = Array.isArray(beat.image.html) ? beat.image.html.join("\n") : beat.image.html;
9
+ const template = getHTMLFile("tailwind");
10
+ const htmlData = interpolate(template, {
11
+ // style: textSlideStyle,
12
+ // width: Math.round(canvasSize.width * 0.625).toString(),
13
+ html_body: html,
14
+ });
15
+ await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height);
16
+ return imagePath;
17
+ };
18
+ export const process = processHtmlTailwind;
@@ -4,4 +4,5 @@ import * as pluginImage from "./image.js";
4
4
  import * as pluginChart from "./chart.js";
5
5
  import * as pluginMermaid from "./mermaid.js";
6
6
  import * as pluginMovie from "./movie.js";
7
- export declare const imagePlugins: (typeof pluginTextSlide | typeof pluginMarkdown | typeof pluginImage | typeof pluginChart | typeof pluginMermaid | typeof pluginMovie)[];
7
+ import * as pluginHtmlTailwind from "./html_tailwind.js";
8
+ export declare const imagePlugins: (typeof pluginTextSlide | typeof pluginMarkdown | typeof pluginImage | typeof pluginChart | typeof pluginMermaid | typeof pluginMovie | typeof pluginHtmlTailwind)[];
@@ -4,4 +4,5 @@ import * as pluginImage from "./image.js";
4
4
  import * as pluginChart from "./chart.js";
5
5
  import * as pluginMermaid from "./mermaid.js";
6
6
  import * as pluginMovie from "./movie.js";
7
- export const imagePlugins = [pluginTextSlide, pluginMarkdown, pluginImage, pluginChart, pluginMermaid, pluginMovie];
7
+ import * as pluginHtmlTailwind from "./html_tailwind.js";
8
+ export const imagePlugins = [pluginTextSlide, pluginMarkdown, pluginImage, pluginChart, pluginMermaid, pluginMovie, pluginHtmlTailwind];
@@ -14,7 +14,7 @@ const processMermaid = async (params) => {
14
14
  style: textSlideStyle,
15
15
  diagram_code: `${diagram_code}\n${beat.image.appendix?.join("\n") ?? ""}`,
16
16
  });
17
- await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height);
17
+ await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, true);
18
18
  }
19
19
  return imagePath;
20
20
  };
@@ -0,0 +1,3 @@
1
+ import { ImageProcessorParams } from "../../types/index.js";
2
+ export declare const imageType = "tailwind";
3
+ export declare const process: (params: ImageProcessorParams) => Promise<string | undefined>;
@@ -0,0 +1,18 @@
1
+ import { getHTMLFile } from "../file.js";
2
+ import { renderHTMLToImage, interpolate } from "../markdown.js";
3
+ export const imageType = "tailwind";
4
+ const processTailwind = async (params) => {
5
+ const { beat, imagePath, canvasSize } = params;
6
+ if (!beat.image || beat.image.type !== imageType)
7
+ return;
8
+ const html = Array.isArray(beat.image.html) ? beat.image.html.join("\n") : beat.image.html;
9
+ const template = getHTMLFile("tailwind");
10
+ const htmlData = interpolate(template, {
11
+ // style: textSlideStyle,
12
+ // width: Math.round(canvasSize.width * 0.625).toString(),
13
+ html_body: html,
14
+ });
15
+ await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height);
16
+ return imagePath;
17
+ };
18
+ export const process = processTailwind;
@@ -5,8 +5,15 @@ const processTextSlide = async (params) => {
5
5
  if (!beat.image || beat.image.type !== imageType)
6
6
  return;
7
7
  const slide = beat.image.slide;
8
- const markdown = `# ${slide.title}\n` + slide.bullets.map((text) => `- ${text}`).join("\n");
9
- await renderMarkdownToImage(markdown, textSlideStyle, imagePath, canvasSize.width, canvasSize.height);
8
+ const markdown = `# ${slide.title}\n` + (slide.subtitle ? `## ${slide.subtitle}\n` : "") + (slide.bullets ?? []).map((text) => `- ${text}`).join("\n");
9
+ const topMargin = (() => {
10
+ if (slide.bullets?.length && slide.bullets.length > 0) {
11
+ return "";
12
+ }
13
+ const marginTop = slide.subtitle ? canvasSize.height * 0.4 : canvasSize.height * 0.45;
14
+ return `body {margin-top: ${marginTop}px;}`;
15
+ })();
16
+ await renderMarkdownToImage(markdown, textSlideStyle + topMargin, imagePath, canvasSize.width, canvasSize.height);
10
17
  return imagePath;
11
18
  };
12
19
  export const process = processTextSlide;
@@ -1,3 +1,3 @@
1
- export declare const renderHTMLToImage: (html: string, outputPath: string, width: number, height: number) => Promise<void>;
1
+ export declare const renderHTMLToImage: (html: string, outputPath: string, width: number, height: number, isMermaid?: boolean, omitBackground?: boolean) => Promise<void>;
2
2
  export declare const renderMarkdownToImage: (markdown: string, style: string, outputPath: string, width: number, height: number) => Promise<void>;
3
3
  export declare const interpolate: (template: string, data: Record<string, string>) => string;
@@ -1,8 +1,7 @@
1
- import { GraphAILogger } from "graphai";
2
1
  import { marked } from "marked";
3
2
  import puppeteer from "puppeteer";
4
3
  const isCI = process.env.CI === "true";
5
- export const renderHTMLToImage = async (html, outputPath, width, height) => {
4
+ export const renderHTMLToImage = async (html, outputPath, width, height, isMermaid = false, omitBackground = false) => {
6
5
  // Use Puppeteer to render HTML to an image
7
6
  const browser = await puppeteer.launch({
8
7
  args: isCI ? ["--no-sandbox"] : [],
@@ -12,10 +11,15 @@ export const renderHTMLToImage = async (html, outputPath, width, height) => {
12
11
  await page.setContent(html);
13
12
  // Adjust page settings if needed (like width, height, etc.)
14
13
  await page.setViewport({ width, height });
14
+ if (isMermaid) {
15
+ await page.waitForFunction(() => {
16
+ const el = document.querySelector(".mermaid");
17
+ return el && el.dataset.ready === "true";
18
+ }, { timeout: 20000 });
19
+ }
15
20
  // Step 3: Capture screenshot of the page (which contains the Markdown-rendered HTML)
16
- await page.screenshot({ path: outputPath });
21
+ await page.screenshot({ path: outputPath, omitBackground: omitBackground });
17
22
  await browser.close();
18
- GraphAILogger.info(`HTML image rendered to ${outputPath}`);
19
23
  };
20
24
  export const renderMarkdownToImage = async (markdown, style, outputPath, width, height) => {
21
25
  const header = `<head><style>${style}</style></head>`;