mulmocast 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/assets/templates/text_and_image.json +6 -0
  2. package/assets/templates/text_only.json +6 -0
  3. package/lib/actions/audio.d.ts +3 -1
  4. package/lib/actions/audio.js +82 -44
  5. package/lib/actions/captions.js +1 -1
  6. package/lib/actions/images.d.ts +4 -0
  7. package/lib/actions/images.js +40 -21
  8. package/lib/actions/movie.js +19 -19
  9. package/lib/actions/pdf.js +2 -2
  10. package/lib/actions/translate.js +1 -1
  11. package/lib/agents/add_bgm_agent.js +3 -3
  12. package/lib/agents/combine_audio_files_agent.js +1 -1
  13. package/lib/agents/index.d.ts +2 -1
  14. package/lib/agents/index.js +2 -1
  15. package/lib/agents/tavily_agent.d.ts +15 -0
  16. package/lib/agents/tavily_agent.js +130 -0
  17. package/lib/cli/commands/audio/builder.d.ts +2 -0
  18. package/lib/cli/commands/image/builder.d.ts +2 -0
  19. package/lib/cli/commands/movie/builder.d.ts +2 -0
  20. package/lib/cli/commands/pdf/builder.d.ts +2 -0
  21. package/lib/cli/commands/translate/builder.d.ts +2 -0
  22. package/lib/cli/common.d.ts +2 -0
  23. package/lib/cli/common.js +6 -0
  24. package/lib/cli/helpers.d.ts +5 -1
  25. package/lib/cli/helpers.js +18 -2
  26. package/lib/methods/index.d.ts +1 -1
  27. package/lib/methods/index.js +1 -1
  28. package/lib/methods/mulmo_presentation_style.d.ts +14 -0
  29. package/lib/methods/mulmo_presentation_style.js +70 -0
  30. package/lib/methods/mulmo_studio_context.d.ts +14 -0
  31. package/lib/methods/mulmo_studio_context.js +20 -2
  32. package/lib/tools/deep_research.d.ts +2 -0
  33. package/lib/tools/deep_research.js +265 -0
  34. package/lib/types/schema.d.ts +31 -0
  35. package/lib/types/schema.js +1 -1
  36. package/lib/types/type.d.ts +3 -1
  37. package/lib/utils/ffmpeg_utils.d.ts +1 -0
  38. package/lib/utils/ffmpeg_utils.js +10 -0
  39. package/lib/utils/file.d.ts +1 -3
  40. package/lib/utils/file.js +4 -11
  41. package/lib/utils/preprocess.js +1 -0
  42. package/lib/utils/prompt.d.ts +3 -0
  43. package/lib/utils/prompt.js +52 -0
  44. package/package.json +4 -3
  45. package/assets/music/StarsBeyondEx.mp3 +0 -0
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Text and Image",
3
+ "description": "Template for Text and Image Script.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "scriptName": "image_prompts_template.json"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Text Only",
3
+ "description": "Template for Text Only Script.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "scriptName": "text_only_template.json"
6
+ }
@@ -1,5 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
- import { MulmoStudioContext } from "../types/index.js";
3
+ import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
4
+ export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
4
5
  export declare const audioFilePath: (context: MulmoStudioContext) => string;
6
+ export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
5
7
  export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,5 +1,6 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI } from "graphai";
3
+ import { TaskManager } from "graphai/lib/task_manager.js";
3
4
  import * as agents from "@graphai/vanilla";
4
5
  import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
5
6
  import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
9
  import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
10
  import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
10
11
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
- import { MulmoScriptMethods } from "../methods/index.js";
12
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
12
13
  import { fileCacheAgentFilter } from "../utils/filters.js";
13
- import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
14
+ import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
14
15
  import { text2hash, localizedText } from "../utils/utils.js";
15
16
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
16
17
  import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -24,7 +25,7 @@ const provider_to_agent = {
24
25
  elevenlabs: "ttsElevenlabsAgent",
25
26
  mock: "mediaMockAgent",
26
27
  };
27
- const getAudioPath = (context, beat, audioFile, audioDirPath) => {
28
+ const getAudioPath = (context, beat, audioFile) => {
28
29
  if (beat.audio?.type === "audio") {
29
30
  const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
30
31
  if (path) {
@@ -35,34 +36,48 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
35
36
  if (beat.text === undefined || beat.text === "") {
36
37
  return undefined; // It indicates that the audio is not needed.
37
38
  }
38
- return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
39
+ return audioFile;
40
+ };
41
+ const getAudioParam = (presentationStyle, beat) => {
42
+ const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
43
+ // Use speaker-specific provider if available, otherwise fall back to script-level provider
44
+ const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
45
+ const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
46
+ return { voiceId, provider, speechOptions };
47
+ };
48
+ export const getBeatAudioPath = (text, context, beat, lang) => {
49
+ const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
50
+ const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
51
+ const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
52
+ const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
53
+ const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
54
+ return getAudioPath(context, beat, audioFile);
39
55
  };
40
56
  const preprocessor = (namedInputs) => {
41
- const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
42
- const { lang } = context;
43
- const speaker = context.studio.script.speechParams.speakers[beat.speaker];
44
- const voiceId = speaker.voiceId;
45
- const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
57
+ const { beat, studioBeat, multiLingual, context } = namedInputs;
58
+ const { lang, presentationStyle } = context;
46
59
  const text = localizedText(beat, multiLingual, lang);
47
- // Use speaker-specific provider if available, otherwise fall back to script-level provider
48
- const provider = speaker.provider ?? context.studio.script.speechParams.provider;
49
- const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
50
- const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
51
- const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
60
+ const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
61
+ const audioPath = getBeatAudioPath(text, context, beat, lang);
52
62
  studioBeat.audioFile = audioPath;
53
63
  const needsTTS = !beat.audio && audioPath !== undefined;
54
64
  return {
55
65
  ttsAgent: provider_to_agent[provider],
56
- studioBeat,
66
+ text,
57
67
  voiceId,
58
68
  speechOptions,
59
69
  audioPath,
60
- text,
70
+ studioBeat,
61
71
  needsTTS,
62
72
  };
63
73
  };
64
74
  const graph_tts = {
65
75
  nodes: {
76
+ beat: {},
77
+ studioBeat: {},
78
+ multiLingual: {},
79
+ context: {},
80
+ __mapIndex: {},
66
81
  preprocessor: {
67
82
  agent: preprocessor,
68
83
  inputs: {
@@ -70,7 +85,6 @@ const graph_tts = {
70
85
  studioBeat: ":studioBeat",
71
86
  multiLingual: ":multiLingual",
72
87
  context: ":context",
73
- audioDirPath: ":audioDirPath",
74
88
  },
75
89
  },
76
90
  tts: {
@@ -100,8 +114,6 @@ const graph_data = {
100
114
  audioArtifactFilePath: {},
101
115
  audioCombinedFilePath: {},
102
116
  outputStudioFilePath: {},
103
- audioDirPath: {},
104
- audioSegmentDirPath: {},
105
117
  musicFile: {},
106
118
  map: {
107
119
  agent: "mapAgent",
@@ -109,8 +121,6 @@ const graph_data = {
109
121
  rows: ":context.studio.script.beats",
110
122
  studioBeat: ":context.studio.beats",
111
123
  multiLingual: ":context.studio.multiLingual",
112
- audioDirPath: ":audioDirPath",
113
- audioSegmentDirPath: ":audioSegmentDirPath",
114
124
  context: ":context",
115
125
  },
116
126
  params: {
@@ -122,7 +132,7 @@ const graph_data = {
122
132
  combineFiles: {
123
133
  agent: "combineAudioFilesAgent",
124
134
  inputs: {
125
- map: ":map",
135
+ onComplete: ":map",
126
136
  context: ":context",
127
137
  combinedFileName: ":audioCombinedFilePath",
128
138
  },
@@ -141,7 +151,7 @@ const graph_data = {
141
151
  wait: ":combineFiles",
142
152
  voiceFile: ":audioCombinedFilePath",
143
153
  outputFile: ":audioArtifactFilePath",
144
- script: ":context.studio.script",
154
+ context: ":context",
145
155
  params: {
146
156
  musicFile: ":musicFile",
147
157
  },
@@ -172,40 +182,68 @@ export const audioFilePath = (context) => {
172
182
  const { outDirPath } = fileDirs;
173
183
  return getAudioArtifactFilePath(outDirPath, studio.filename);
174
184
  };
185
+ const getConcurrency = (context) => {
186
+ // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
187
+ const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
188
+ const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
189
+ return provider === "nijivoice" || provider === "elevenlabs";
190
+ });
191
+ return hasLimitedConcurrencyProvider ? 1 : 8;
192
+ };
193
+ const audioAgents = {
194
+ ...vanillaAgents,
195
+ fileWriteAgent,
196
+ ttsOpenaiAgent,
197
+ ttsNijivoiceAgent,
198
+ ttsGoogleAgent,
199
+ ttsElevenlabsAgent,
200
+ addBGMAgent,
201
+ combineAudioFilesAgent,
202
+ };
203
+ export const generateBeatAudio = async (index, context, callbacks) => {
204
+ try {
205
+ MulmoStudioContextMethods.setSessionState(context, "audio", true);
206
+ const { studio, fileDirs } = context;
207
+ const { outDirPath, audioDirPath } = fileDirs;
208
+ const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
209
+ mkdir(outDirPath);
210
+ mkdir(audioSegmentDirPath);
211
+ const taskManager = new TaskManager(getConcurrency(context));
212
+ const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
213
+ graph.injectValue("__mapIndex", index);
214
+ graph.injectValue("beat", context.studio.script.beats[index]);
215
+ graph.injectValue("studioBeat", context.studio.beats[index]);
216
+ graph.injectValue("multiLingual", context.studio.multiLingual);
217
+ graph.injectValue("context", context);
218
+ if (callbacks) {
219
+ callbacks.forEach((callback) => {
220
+ graph.registerCallback(callback);
221
+ });
222
+ }
223
+ await graph.run();
224
+ }
225
+ finally {
226
+ MulmoStudioContextMethods.setSessionState(context, "audio", false);
227
+ }
228
+ };
175
229
  export const audio = async (context, callbacks) => {
176
230
  try {
177
231
  MulmoStudioContextMethods.setSessionState(context, "audio", true);
178
232
  const { studio, fileDirs, lang } = context;
179
233
  const { outDirPath, audioDirPath } = fileDirs;
180
234
  const audioArtifactFilePath = audioFilePath(context);
181
- const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
182
- const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
235
+ const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
236
+ const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
183
237
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
184
238
  mkdir(outDirPath);
185
239
  mkdir(audioSegmentDirPath);
186
- // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
187
- const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
188
- const provider = speaker.provider ?? studio.script.speechParams.provider;
189
- return provider === "nijivoice" || provider === "elevenlabs";
190
- });
191
- graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
192
- const graph = new GraphAI(graph_data, {
193
- ...vanillaAgents,
194
- fileWriteAgent,
195
- ttsOpenaiAgent,
196
- ttsNijivoiceAgent,
197
- ttsGoogleAgent,
198
- ttsElevenlabsAgent,
199
- addBGMAgent,
200
- combineAudioFilesAgent,
201
- }, { agentFilters });
240
+ const taskManager = new TaskManager(getConcurrency(context));
241
+ const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
202
242
  graph.injectValue("context", context);
203
243
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
204
244
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
205
245
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
206
- graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
207
- graph.injectValue("audioDirPath", audioDirPath);
208
- graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
246
+ graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
209
247
  if (callbacks) {
210
248
  callbacks.forEach((callback) => {
211
249
  graph.registerCallback(callback);
@@ -26,7 +26,7 @@ const graph_data = {
26
26
  const { fileDirs } = namedInputs.context;
27
27
  const { caption } = context;
28
28
  const { imageDirPath } = fileDirs;
29
- const { canvasSize } = context.studio.script;
29
+ const { canvasSize } = context.presentationStyle;
30
30
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
31
31
  const template = getHTMLFile("caption");
32
32
  const text = (() => {
@@ -32,8 +32,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
32
32
  };
33
33
  movieFile: string | undefined;
34
34
  imagePath: string | undefined;
35
+ referenceImage: string | undefined;
35
36
  } | {
37
+ imagePath: string;
36
38
  images: string[];
39
+ imageFromMovie: boolean;
37
40
  imageParams: {
38
41
  model?: string | undefined;
39
42
  style?: string | undefined;
@@ -81,6 +84,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
81
84
  };
82
85
  movieFile: string | undefined;
83
86
  imagePath: string;
87
+ referenceImage: string;
84
88
  prompt: string;
85
89
  }>;
86
90
  export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,22 +1,24 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
3
  import { GraphAI, GraphAILogger } from "graphai";
4
+ import { TaskManager } from "graphai/lib/task_manager.js";
4
5
  import * as agents from "@graphai/vanilla";
5
6
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
7
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
7
8
  import { fileCacheAgentFilter } from "../utils/filters.js";
8
9
  import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
9
- import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
10
+ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
10
11
  import { imagePlugins } from "../utils/image_plugins/index.js";
11
12
  import { imagePrompt } from "../utils/prompt.js";
12
13
  const vanillaAgents = agents.default ?? agents;
13
14
  dotenv.config();
14
15
  // const openai = new OpenAI();
15
16
  import { GoogleAuth } from "google-auth-library";
16
- const htmlStyle = (script, beat) => {
17
+ import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
18
+ const htmlStyle = (context, beat) => {
17
19
  return {
18
- canvasSize: MulmoScriptMethods.getCanvasSize(script),
19
- textSlideStyle: MulmoScriptMethods.getTextSlideStyle(script, beat),
20
+ canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
21
+ textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
20
22
  };
21
23
  };
22
24
  export const imagePreprocessAgent = async (namedInputs) => {
@@ -32,10 +34,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
32
34
  if (plugin) {
33
35
  try {
34
36
  MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
35
- const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
37
+ const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
36
38
  const path = await plugin.process(processorParams);
37
39
  // undefined prompt indicates that image generation is not needed
38
- return { imagePath: path, ...returnValue };
40
+ return { imagePath: path, referenceImage: path, ...returnValue };
39
41
  }
40
42
  finally {
41
43
  MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
@@ -49,10 +51,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
49
51
  return sources.filter((source) => source !== undefined);
50
52
  })();
51
53
  if (beat.moviePrompt && !beat.imagePrompt) {
52
- return { ...returnValue, images }; // no image prompt, only movie prompt
54
+ return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
53
55
  }
54
56
  const prompt = imagePrompt(beat, imageParams.style);
55
- return { imagePath, prompt, ...returnValue, images };
57
+ return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
56
58
  };
57
59
  const beat_graph_data = {
58
60
  version: 0.5,
@@ -93,7 +95,7 @@ const beat_graph_data = {
93
95
  params: {
94
96
  model: ":preprocessor.imageParams.model",
95
97
  moderation: ":preprocessor.imageParams.moderation",
96
- canvasSize: ":context.studio.script.canvasSize",
98
+ canvasSize: ":context.presentationStyle.canvasSize",
97
99
  },
98
100
  },
99
101
  defaultValue: {},
@@ -104,24 +106,37 @@ const beat_graph_data = {
104
106
  inputs: {
105
107
  onComplete: ":imageGenerator", // to wait for imageGenerator to finish
106
108
  prompt: ":beat.moviePrompt",
107
- imagePath: ":preprocessor.imagePath",
109
+ imagePath: ":preprocessor.referenceImage",
108
110
  file: ":preprocessor.movieFile",
109
111
  studio: ":context.studio", // for cache
110
112
  mulmoContext: ":context", // for fileCacheAgentFilter
111
113
  index: ":__mapIndex", // for cache
112
114
  sessionType: "movie", // for cache
113
115
  params: {
114
- model: ":context.studio.script.movieParams.model",
116
+ model: ":context.presentationStyle.movieParams.model",
115
117
  duration: ":beat.duration",
116
- canvasSize: ":context.studio.script.canvasSize",
118
+ canvasSize: ":context.presentationStyle.canvasSize",
117
119
  },
118
120
  },
119
121
  defaultValue: {},
120
122
  },
123
+ imageFromMovie: {
124
+ if: ":preprocessor.imageFromMovie",
125
+ agent: async (namedInputs) => {
126
+ await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
127
+ return { generatedImage: true };
128
+ },
129
+ inputs: {
130
+ onComplete: ":movieGenerator", // to wait for movieGenerator to finish
131
+ imageFile: ":preprocessor.imagePath",
132
+ movieFile: ":preprocessor.movieFile",
133
+ },
134
+ defaultValue: { generatedImage: false },
135
+ },
121
136
  output: {
122
137
  agent: "copyAgent",
123
138
  inputs: {
124
- onComplete: ":movieGenerator", // to wait for movieGenerator to finish
139
+ onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
125
140
  imageFile: ":preprocessor.imagePath",
126
141
  movieFile: ":preprocessor.movieFile",
127
142
  },
@@ -217,7 +232,6 @@ const googleAuth = async () => {
217
232
  }
218
233
  };
219
234
  const graphOption = async (context) => {
220
- const { studio } = context;
221
235
  const agentFilters = [
222
236
  {
223
237
  name: "fileCacheAgentFilter",
@@ -225,12 +239,14 @@ const graphOption = async (context) => {
225
239
  nodeIds: ["imageGenerator", "movieGenerator"],
226
240
  },
227
241
  ];
242
+ const taskManager = new TaskManager(getConcurrency(context));
228
243
  const options = {
229
244
  agentFilters,
245
+ taskManager,
230
246
  };
231
- const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
247
+ const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
232
248
  // We need to get google's auth token only if the google is the text2image provider.
233
- if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
249
+ if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
234
250
  GraphAILogger.log("google was specified as text2image engine");
235
251
  const token = await googleAuth();
236
252
  options.config = {
@@ -250,9 +266,9 @@ const prepareGenerateImages = async (context) => {
250
266
  const { studio, fileDirs } = context;
251
267
  const { outDirPath, imageDirPath } = fileDirs;
252
268
  mkdir(`${imageDirPath}/${studio.filename}`);
253
- const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script, context.dryRun);
269
+ const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
254
270
  const imageRefs = {};
255
- const images = studio.script.imageParams?.images;
271
+ const images = context.presentationStyle.imageParams?.images;
256
272
  if (images) {
257
273
  await Promise.all(Object.keys(images).map(async (key) => {
258
274
  const image = images[key];
@@ -302,14 +318,17 @@ const prepareGenerateImages = async (context) => {
302
318
  };
303
319
  return injections;
304
320
  };
305
- const generateImages = async (context, callbacks) => {
306
- const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(context.studio.script);
321
+ const getConcurrency = (context) => {
322
+ const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
307
323
  if (imageAgentInfo.provider === "openai") {
308
324
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
309
325
  // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
310
326
  // gpt-image-1:3,000,000 TPM、150 images per minute
311
- graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
327
+ return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
312
328
  }
329
+ return 4;
330
+ };
331
+ const generateImages = async (context, callbacks) => {
313
332
  const options = await graphOption(context);
314
333
  const injections = await prepareGenerateImages(context);
315
334
  const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
@@ -1,6 +1,6 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
2
  import { mulmoTransitionSchema } from "../types/index.js";
3
- import { MulmoScriptMethods } from "../methods/index.js";
3
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
5
5
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
6
6
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
@@ -59,22 +59,22 @@ const getOutputOption = (audioId, videoId) => {
59
59
  "-b:a 128k", // Audio bitrate
60
60
  ];
61
61
  };
62
- const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
62
+ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, caption) => {
63
63
  const start = performance.now();
64
64
  const ffmpegContext = FfmpegContextInit();
65
- const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
65
+ const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
66
66
  if (missingIndex !== -1) {
67
67
  GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
68
68
  return false;
69
69
  }
70
- const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
70
+ const canvasInfo = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
71
71
  // Add each image input
72
72
  const filterComplexVideoIds = [];
73
73
  const filterComplexAudioIds = [];
74
74
  const transitionVideoIds = [];
75
75
  const beatTimestamps = [];
76
- studio.beats.reduce((timestamp, studioBeat, index) => {
77
- const beat = studio.script.beats[index];
76
+ context.studio.beats.reduce((timestamp, studioBeat, index) => {
77
+ const beat = context.studio.script.beats[index];
78
78
  const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
79
79
  if (!sourceFile) {
80
80
  throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
@@ -83,14 +83,14 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
83
83
  throw new Error(`studioBeat.duration is not set: index=${index}`);
84
84
  }
85
85
  const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
86
- const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
86
+ const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
87
87
  const extraPadding = (() => {
88
88
  // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
89
89
  if (index === 0) {
90
- return studio.script.audioParams.introPadding;
90
+ return context.presentationStyle.audioParams.introPadding;
91
91
  }
92
- else if (index === studio.beats.length - 1) {
93
- return studio.script.audioParams.outroPadding;
92
+ else if (index === context.studio.beats.length - 1) {
93
+ return context.presentationStyle.audioParams.outroPadding;
94
94
  }
95
95
  return 0;
96
96
  })();
@@ -106,7 +106,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
106
106
  else {
107
107
  filterComplexVideoIds.push(videoId);
108
108
  }
109
- if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
109
+ if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
110
110
  const sourceId = filterComplexVideoIds.pop();
111
111
  ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
112
112
  filterComplexVideoIds.push(`${sourceId}_0`);
@@ -127,16 +127,16 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
127
127
  beatTimestamps.push(timestamp);
128
128
  return timestamp + duration;
129
129
  }, 0);
130
- assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
131
- assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
130
+ assert(filterComplexVideoIds.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
131
+ assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
132
132
  // console.log("*** images", images.audioIds);
133
133
  // Concatenate the trimmed images
134
134
  const concatVideoId = "concat_video";
135
- ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
135
+ ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${context.studio.beats.length}:v=1:a=0[${concatVideoId}]`);
136
136
  // Add tranditions if needed
137
137
  const mixedVideoId = (() => {
138
- if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
139
- const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
138
+ if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
139
+ const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
140
140
  return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
141
141
  const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
142
142
  const processedVideoId = `${transitionVideoId}_f`;
@@ -166,8 +166,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
166
166
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
167
167
  const end = performance.now();
168
168
  GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
169
- GraphAILogger.info(studio.script.title);
170
- GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
169
+ GraphAILogger.info(context.studio.script.title);
170
+ GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
171
171
  return true;
172
172
  };
173
173
  export const movieFilePath = (context) => {
@@ -181,7 +181,7 @@ export const movie = async (context) => {
181
181
  const { outDirPath } = fileDirs;
182
182
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
183
183
  const outputVideoPath = movieFilePath(context);
184
- if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
184
+ if (await createVideo(audioArtifactFilePath, outputVideoPath, context, caption)) {
185
185
  writingMessage(outputVideoPath);
186
186
  }
187
187
  }
@@ -1,7 +1,7 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import puppeteer from "puppeteer";
4
- import { MulmoScriptMethods } from "../methods/index.js";
4
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
5
5
  import { localizedText, isHttp } from "../utils/utils.js";
6
6
  import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
7
7
  import { interpolate } from "../utils/markdown.js";
@@ -97,7 +97,7 @@ const getHandoutTemplateData = (isLandscapeImage) => ({
97
97
  const generatePDFHTML = async (context, pdfMode, pdfSize) => {
98
98
  const { studio, lang = "en" } = context;
99
99
  const { multiLingual } = studio;
100
- const { width: imageWidth, height: imageHeight } = MulmoScriptMethods.getCanvasSize(studio.script);
100
+ const { width: imageWidth, height: imageHeight } = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
101
101
  const isLandscapeImage = imageWidth > imageHeight;
102
102
  const imagePaths = studio.beats.map((beat) => beat.imageFile);
103
103
  const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
@@ -163,7 +163,7 @@ const translateGraph = {
163
163
  },
164
164
  },
165
165
  },
166
- writeOutout: {
166
+ writeOutput: {
167
167
  // console: { before: true },
168
168
  agent: "fileWriteAgent",
169
169
  inputs: {
@@ -1,11 +1,11 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
3
3
  const addBGMAgent = async ({ namedInputs, params, }) => {
4
- const { voiceFile, outputFile, script } = namedInputs;
4
+ const { voiceFile, outputFile, context } = namedInputs;
5
5
  const { musicFile } = params;
6
6
  const speechDuration = await ffmpegGetMediaDuration(voiceFile);
7
- const introPadding = script.audioParams.introPadding;
8
- const outroPadding = script.audioParams.outroPadding;
7
+ const introPadding = context.presentationStyle.audioParams.introPadding;
8
+ const outroPadding = context.presentationStyle.audioParams.outroPadding;
9
9
  const totalDuration = speechDuration + introPadding + outroPadding;
10
10
  GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
11
11
  const ffmpegContext = FfmpegContextInit();
@@ -27,7 +27,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
27
27
  if (index === context.studio.beats.length - 1) {
28
28
  return 0;
29
29
  }
30
- return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
30
+ return isClosingGap ? context.presentationStyle.audioParams.closingPadding : context.presentationStyle.audioParams.padding;
31
31
  })();
32
32
  const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
33
33
  const totalPadding = await (async () => {
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
+ import tavilySearchAgent from "./tavily_agent.js";
5
6
  import movieGoogleAgent from "./movie_google_agent.js";
6
7
  import mediaMockAgent from "./media_mock_agent.js";
7
8
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
@@ -12,4 +13,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
12
13
  import { textInputAgent } from "@graphai/input_agents";
13
14
  import { openAIAgent } from "@graphai/openai_agent";
14
15
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
15
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
16
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
+ import tavilySearchAgent from "./tavily_agent.js";
5
6
  import movieGoogleAgent from "./movie_google_agent.js";
6
7
  import mediaMockAgent from "./media_mock_agent.js";
7
8
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
@@ -13,4 +14,4 @@ import { textInputAgent } from "@graphai/input_agents";
13
14
  import { openAIAgent } from "@graphai/openai_agent";
14
15
  // import * as vanilla from "@graphai/vanilla";
15
16
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
16
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
17
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };