mulmocast 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +5 -1
  2. package/assets/html/pdf_handout.html +85 -0
  3. package/assets/html/pdf_slide.html +55 -0
  4. package/assets/html/pdf_talk.html +76 -0
  5. package/assets/templates/text_and_image.json +6 -0
  6. package/assets/templates/text_only.json +6 -0
  7. package/lib/actions/audio.d.ts +3 -1
  8. package/lib/actions/audio.js +84 -45
  9. package/lib/actions/captions.js +1 -1
  10. package/lib/actions/images.d.ts +89 -1
  11. package/lib/actions/images.js +160 -99
  12. package/lib/actions/movie.js +28 -21
  13. package/lib/actions/pdf.d.ts +1 -0
  14. package/lib/actions/pdf.js +134 -204
  15. package/lib/actions/translate.js +1 -1
  16. package/lib/agents/add_bgm_agent.js +3 -3
  17. package/lib/agents/combine_audio_files_agent.js +11 -9
  18. package/lib/agents/image_mock_agent.d.ts +4 -0
  19. package/lib/agents/image_mock_agent.js +18 -0
  20. package/lib/agents/index.d.ts +4 -1
  21. package/lib/agents/index.js +4 -1
  22. package/lib/agents/media_mock_agent.d.ts +4 -0
  23. package/lib/agents/media_mock_agent.js +18 -0
  24. package/lib/agents/tavily_agent.d.ts +15 -0
  25. package/lib/agents/tavily_agent.js +130 -0
  26. package/lib/agents/tts_openai_agent.js +9 -1
  27. package/lib/cli/commands/audio/builder.d.ts +4 -0
  28. package/lib/cli/commands/image/builder.d.ts +4 -0
  29. package/lib/cli/commands/movie/builder.d.ts +4 -0
  30. package/lib/cli/commands/pdf/builder.d.ts +4 -0
  31. package/lib/cli/commands/translate/builder.d.ts +4 -0
  32. package/lib/cli/common.d.ts +4 -0
  33. package/lib/cli/common.js +11 -0
  34. package/lib/cli/helpers.d.ts +5 -1
  35. package/lib/cli/helpers.js +19 -2
  36. package/lib/methods/index.d.ts +1 -1
  37. package/lib/methods/index.js +1 -1
  38. package/lib/methods/mulmo_presentation_style.d.ts +14 -0
  39. package/lib/methods/mulmo_presentation_style.js +70 -0
  40. package/lib/methods/mulmo_script.d.ts +1 -1
  41. package/lib/methods/mulmo_script.js +2 -2
  42. package/lib/methods/mulmo_studio_context.d.ts +14 -0
  43. package/lib/methods/mulmo_studio_context.js +20 -2
  44. package/lib/tools/deep_research.d.ts +2 -0
  45. package/lib/tools/deep_research.js +265 -0
  46. package/lib/types/schema.d.ts +31 -0
  47. package/lib/types/schema.js +1 -1
  48. package/lib/types/type.d.ts +4 -1
  49. package/lib/utils/ffmpeg_utils.d.ts +1 -0
  50. package/lib/utils/ffmpeg_utils.js +10 -0
  51. package/lib/utils/file.d.ts +1 -3
  52. package/lib/utils/file.js +4 -11
  53. package/lib/utils/filters.js +1 -0
  54. package/lib/utils/markdown.js +1 -1
  55. package/lib/utils/preprocess.js +1 -0
  56. package/lib/utils/prompt.d.ts +3 -0
  57. package/lib/utils/prompt.js +52 -0
  58. package/package.json +10 -10
  59. package/assets/font/NotoSansJP-Regular.ttf +0 -0
  60. package/assets/music/StarsBeyondEx.mp3 +0 -0
package/README.md CHANGED
@@ -103,9 +103,13 @@ GOOGLE_PROJECT_ID=your_google_project_id
103
103
 
104
104
  See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
105
105
 
106
- #### (Optional) For Nijivoice's TTS model
106
+ #### (Optional) For TTS models
107
107
  ```bash
108
+ # For Nijivoice TTS
108
109
  NIJIVOICE_API_KEY=your_nijivoice_api_key
110
+
111
+ # For ElevenLabs TTS
112
+ ELEVENLABS_API_KEY=your_elevenlabs_api_key
109
113
  ```
110
114
 
111
115
  #### (Optional) to access web in mulmo tool
@@ -0,0 +1,85 @@
1
+ <!DOCTYPE html>
2
+ <html lang="${lang}">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>${title}</title>
7
+ <style>
8
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
9
+
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ body {
17
+ font-family: 'Noto Sans JP', sans-serif;
18
+ font-size: 16px;
19
+ line-height: 1.6;
20
+ color: #333;
21
+ background: #fff;
22
+ }
23
+
24
+ @page {
25
+ size: ${page_size};
26
+ margin: 0;
27
+ }
28
+
29
+ .page {
30
+ page-break-after: always;
31
+ width: 100%;
32
+ height: 100vh;
33
+ position: relative;
34
+ overflow: hidden;
35
+ padding: 15px;
36
+ display: ${page_layout};
37
+ ${page_direction}
38
+ gap: 15px;
39
+ background: #fff;
40
+ }
41
+
42
+ .page:last-child {
43
+ page-break-after: avoid;
44
+ }
45
+
46
+ img {
47
+ max-width: 100%;
48
+ max-height: 100%;
49
+ object-fit: contain;
50
+ }
51
+
52
+ .handout-item {
53
+ display: flex;
54
+ flex-direction: ${flex_direction};
55
+ border: 1px solid #ddd;
56
+ overflow: hidden;
57
+ ${item_flex}
58
+ }
59
+
60
+ .handout-image {
61
+ ${image_size}
62
+ display: flex;
63
+ align-items: center;
64
+ justify-content: center;
65
+ background: #f9f9f9;
66
+ padding: 5px;
67
+ }
68
+
69
+ .handout-text {
70
+ ${text_size}
71
+ padding: 8px;
72
+ font-size: 14px;
73
+ overflow: hidden;
74
+ background: #fff;
75
+ }
76
+
77
+ .handout-text p {
78
+ margin: 0.3em 0;
79
+ }
80
+ </style>
81
+ </head>
82
+ <body>
83
+ ${pages}
84
+ </body>
85
+ </html>
@@ -0,0 +1,55 @@
1
+ <!DOCTYPE html>
2
+ <html lang="${lang}">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>${title}</title>
7
+ <style>
8
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
9
+
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ body {
17
+ font-family: 'Noto Sans JP', sans-serif;
18
+ font-size: 14px;
19
+ line-height: 1.6;
20
+ color: #333;
21
+ background: #fff;
22
+ }
23
+
24
+ @page {
25
+ size: ${page_size};
26
+ margin: 0;
27
+ }
28
+
29
+ .page {
30
+ page-break-after: always;
31
+ width: 100%;
32
+ height: 100vh;
33
+ position: relative;
34
+ overflow: hidden;
35
+ display: flex;
36
+ align-items: center;
37
+ justify-content: center;
38
+ background: #fff;
39
+ }
40
+
41
+ .page:last-child {
42
+ page-break-after: avoid;
43
+ }
44
+
45
+ img {
46
+ max-width: 100%;
47
+ max-height: 100%;
48
+ object-fit: contain;
49
+ }
50
+ </style>
51
+ </head>
52
+ <body>
53
+ ${pages}
54
+ </body>
55
+ </html>
@@ -0,0 +1,76 @@
1
+ <!DOCTYPE html>
2
+ <html lang="${lang}">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>${title}</title>
7
+ <style>
8
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
9
+
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ body {
17
+ font-family: 'Noto Sans JP', sans-serif;
18
+ font-size: 17px;
19
+ line-height: 1.4;
20
+ color: #333;
21
+ background: #fff;
22
+ }
23
+
24
+ @page {
25
+ size: ${page_size};
26
+ margin: 0;
27
+ }
28
+
29
+ .page {
30
+ page-break-after: always;
31
+ width: 100%;
32
+ height: 100vh;
33
+ position: relative;
34
+ overflow: hidden;
35
+ padding: 20px;
36
+ display: flex;
37
+ flex-direction: column;
38
+ background: #fff;
39
+ }
40
+
41
+ .page:last-child {
42
+ page-break-after: avoid;
43
+ }
44
+
45
+ img {
46
+ max-width: 100%;
47
+ max-height: 100%;
48
+ object-fit: contain;
49
+ }
50
+
51
+ .image-container {
52
+ flex: 1;
53
+ display: flex;
54
+ align-items: center;
55
+ justify-content: center;
56
+ margin-bottom: 20px;
57
+ border: 1px solid #ddd;
58
+ background: #f9f9f9;
59
+ }
60
+
61
+ .text-container {
62
+ padding: 10px;
63
+ background: #fff;
64
+ border-top: 2px solid #333;
65
+ min-height: 120px;
66
+ }
67
+
68
+ .text-container p {
69
+ margin: 0.5em 0;
70
+ }
71
+ </style>
72
+ </head>
73
+ <body>
74
+ ${pages}
75
+ </body>
76
+ </html>
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Text and Image",
3
+ "description": "Template for Text and Image Script.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "scriptName": "image_prompts_template.json"
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Text Only",
3
+ "description": "Template for Text Only Script.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "scriptName": "text_only_template.json"
6
+ }
@@ -1,5 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
- import { MulmoStudioContext } from "../types/index.js";
3
+ import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
4
+ export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
4
5
  export declare const audioFilePath: (context: MulmoStudioContext) => string;
6
+ export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
5
7
  export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,5 +1,6 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI } from "graphai";
3
+ import { TaskManager } from "graphai/lib/task_manager.js";
3
4
  import * as agents from "@graphai/vanilla";
4
5
  import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
5
6
  import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
9
  import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
10
  import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
10
11
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
- import { MulmoScriptMethods } from "../methods/index.js";
12
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
12
13
  import { fileCacheAgentFilter } from "../utils/filters.js";
13
- import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
14
+ import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
14
15
  import { text2hash, localizedText } from "../utils/utils.js";
15
16
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
16
17
  import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -22,8 +23,9 @@ const provider_to_agent = {
22
23
  openai: "ttsOpenaiAgent",
23
24
  google: "ttsGoogleAgent",
24
25
  elevenlabs: "ttsElevenlabsAgent",
26
+ mock: "mediaMockAgent",
25
27
  };
26
- const getAudioPath = (context, beat, audioFile, audioDirPath) => {
28
+ const getAudioPath = (context, beat, audioFile) => {
27
29
  if (beat.audio?.type === "audio") {
28
30
  const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
29
31
  if (path) {
@@ -31,37 +33,51 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
31
33
  }
32
34
  throw new Error("Invalid audio source");
33
35
  }
34
- if (beat.text === "") {
36
+ if (beat.text === undefined || beat.text === "") {
35
37
  return undefined; // It indicates that the audio is not needed.
36
38
  }
37
- return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
39
+ return audioFile;
40
+ };
41
+ const getAudioParam = (presentationStyle, beat) => {
42
+ const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
43
+ // Use speaker-specific provider if available, otherwise fall back to script-level provider
44
+ const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
45
+ const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
46
+ return { voiceId, provider, speechOptions };
47
+ };
48
+ export const getBeatAudioPath = (text, context, beat, lang) => {
49
+ const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
50
+ const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
51
+ const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
52
+ const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
53
+ const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
54
+ return getAudioPath(context, beat, audioFile);
38
55
  };
39
56
  const preprocessor = (namedInputs) => {
40
- const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
41
- const { lang } = context;
42
- const speaker = context.studio.script.speechParams.speakers[beat.speaker];
43
- const voiceId = speaker.voiceId;
44
- const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
57
+ const { beat, studioBeat, multiLingual, context } = namedInputs;
58
+ const { lang, presentationStyle } = context;
45
59
  const text = localizedText(beat, multiLingual, lang);
46
- // Use speaker-specific provider if available, otherwise fall back to script-level provider
47
- const provider = speaker.provider ?? context.studio.script.speechParams.provider;
48
- const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
49
- const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
50
- const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
60
+ const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
61
+ const audioPath = getBeatAudioPath(text, context, beat, lang);
51
62
  studioBeat.audioFile = audioPath;
52
63
  const needsTTS = !beat.audio && audioPath !== undefined;
53
64
  return {
54
65
  ttsAgent: provider_to_agent[provider],
55
- studioBeat,
66
+ text,
56
67
  voiceId,
57
68
  speechOptions,
58
69
  audioPath,
59
- text,
70
+ studioBeat,
60
71
  needsTTS,
61
72
  };
62
73
  };
63
74
  const graph_tts = {
64
75
  nodes: {
76
+ beat: {},
77
+ studioBeat: {},
78
+ multiLingual: {},
79
+ context: {},
80
+ __mapIndex: {},
65
81
  preprocessor: {
66
82
  agent: preprocessor,
67
83
  inputs: {
@@ -69,7 +85,6 @@ const graph_tts = {
69
85
  studioBeat: ":studioBeat",
70
86
  multiLingual: ":multiLingual",
71
87
  context: ":context",
72
- audioDirPath: ":audioDirPath",
73
88
  },
74
89
  },
75
90
  tts: {
@@ -99,8 +114,6 @@ const graph_data = {
99
114
  audioArtifactFilePath: {},
100
115
  audioCombinedFilePath: {},
101
116
  outputStudioFilePath: {},
102
- audioDirPath: {},
103
- audioSegmentDirPath: {},
104
117
  musicFile: {},
105
118
  map: {
106
119
  agent: "mapAgent",
@@ -108,8 +121,6 @@ const graph_data = {
108
121
  rows: ":context.studio.script.beats",
109
122
  studioBeat: ":context.studio.beats",
110
123
  multiLingual: ":context.studio.multiLingual",
111
- audioDirPath: ":audioDirPath",
112
- audioSegmentDirPath: ":audioSegmentDirPath",
113
124
  context: ":context",
114
125
  },
115
126
  params: {
@@ -121,7 +132,7 @@ const graph_data = {
121
132
  combineFiles: {
122
133
  agent: "combineAudioFilesAgent",
123
134
  inputs: {
124
- map: ":map",
135
+ onComplete: ":map",
125
136
  context: ":context",
126
137
  combinedFileName: ":audioCombinedFilePath",
127
138
  },
@@ -140,7 +151,7 @@ const graph_data = {
140
151
  wait: ":combineFiles",
141
152
  voiceFile: ":audioCombinedFilePath",
142
153
  outputFile: ":audioArtifactFilePath",
143
- script: ":context.studio.script",
154
+ context: ":context",
144
155
  params: {
145
156
  musicFile: ":musicFile",
146
157
  },
@@ -171,40 +182,68 @@ export const audioFilePath = (context) => {
171
182
  const { outDirPath } = fileDirs;
172
183
  return getAudioArtifactFilePath(outDirPath, studio.filename);
173
184
  };
185
+ const getConcurrency = (context) => {
186
+ // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
187
+ const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
188
+ const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
189
+ return provider === "nijivoice" || provider === "elevenlabs";
190
+ });
191
+ return hasLimitedConcurrencyProvider ? 1 : 8;
192
+ };
193
+ const audioAgents = {
194
+ ...vanillaAgents,
195
+ fileWriteAgent,
196
+ ttsOpenaiAgent,
197
+ ttsNijivoiceAgent,
198
+ ttsGoogleAgent,
199
+ ttsElevenlabsAgent,
200
+ addBGMAgent,
201
+ combineAudioFilesAgent,
202
+ };
203
+ export const generateBeatAudio = async (index, context, callbacks) => {
204
+ try {
205
+ MulmoStudioContextMethods.setSessionState(context, "audio", true);
206
+ const { studio, fileDirs } = context;
207
+ const { outDirPath, audioDirPath } = fileDirs;
208
+ const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
209
+ mkdir(outDirPath);
210
+ mkdir(audioSegmentDirPath);
211
+ const taskManager = new TaskManager(getConcurrency(context));
212
+ const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
213
+ graph.injectValue("__mapIndex", index);
214
+ graph.injectValue("beat", context.studio.script.beats[index]);
215
+ graph.injectValue("studioBeat", context.studio.beats[index]);
216
+ graph.injectValue("multiLingual", context.studio.multiLingual);
217
+ graph.injectValue("context", context);
218
+ if (callbacks) {
219
+ callbacks.forEach((callback) => {
220
+ graph.registerCallback(callback);
221
+ });
222
+ }
223
+ await graph.run();
224
+ }
225
+ finally {
226
+ MulmoStudioContextMethods.setSessionState(context, "audio", false);
227
+ }
228
+ };
174
229
  export const audio = async (context, callbacks) => {
175
230
  try {
176
231
  MulmoStudioContextMethods.setSessionState(context, "audio", true);
177
232
  const { studio, fileDirs, lang } = context;
178
233
  const { outDirPath, audioDirPath } = fileDirs;
179
234
  const audioArtifactFilePath = audioFilePath(context);
180
- const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
181
- const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
235
+ const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
236
+ const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
182
237
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
183
238
  mkdir(outDirPath);
184
239
  mkdir(audioSegmentDirPath);
185
- // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
186
- const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
187
- const provider = speaker.provider ?? studio.script.speechParams.provider;
188
- return provider === "nijivoice" || provider === "elevenlabs";
189
- });
190
- graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
191
- const graph = new GraphAI(graph_data, {
192
- ...vanillaAgents,
193
- fileWriteAgent,
194
- ttsOpenaiAgent,
195
- ttsNijivoiceAgent,
196
- ttsGoogleAgent,
197
- ttsElevenlabsAgent,
198
- addBGMAgent,
199
- combineAudioFilesAgent,
200
- }, { agentFilters });
240
+ const taskManager = new TaskManager(getConcurrency(context));
241
+ const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
201
242
  graph.injectValue("context", context);
202
243
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
203
244
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
204
245
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
205
- graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
206
- graph.injectValue("audioDirPath", audioDirPath);
207
- graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
246
+ graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
208
247
  if (callbacks) {
209
248
  callbacks.forEach((callback) => {
210
249
  graph.registerCallback(callback);
@@ -26,7 +26,7 @@ const graph_data = {
26
26
  const { fileDirs } = namedInputs.context;
27
27
  const { caption } = context;
28
28
  const { imageDirPath } = fileDirs;
29
- const { canvasSize } = context.studio.script;
29
+ const { canvasSize } = context.presentationStyle;
30
30
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
31
31
  const template = getHTMLFile("caption");
32
32
  const text = (() => {
@@ -1,3 +1,91 @@
1
1
  import type { CallbackFunction } from "graphai";
2
- import { MulmoStudioContext } from "../types/index.js";
2
+ import { MulmoStudioContext, MulmoBeat, Text2ImageAgentInfo } from "../types/index.js";
3
+ export declare const imagePreprocessAgent: (namedInputs: {
4
+ context: MulmoStudioContext;
5
+ beat: MulmoBeat;
6
+ index: number;
7
+ suffix: string;
8
+ imageDirPath: string;
9
+ imageAgentInfo: Text2ImageAgentInfo;
10
+ imageRefs: Record<string, string>;
11
+ }) => Promise<{
12
+ imageParams: {
13
+ model?: string | undefined;
14
+ style?: string | undefined;
15
+ moderation?: string | undefined;
16
+ images?: Record<string, {
17
+ type: "image";
18
+ source: {
19
+ url: string;
20
+ kind: "url";
21
+ } | {
22
+ kind: "base64";
23
+ data: string;
24
+ } | {
25
+ text: string;
26
+ kind: "text";
27
+ } | {
28
+ path: string;
29
+ kind: "path";
30
+ };
31
+ }> | undefined;
32
+ };
33
+ movieFile: string | undefined;
34
+ imagePath: string | undefined;
35
+ referenceImage: string | undefined;
36
+ } | {
37
+ imagePath: string;
38
+ images: string[];
39
+ imageFromMovie: boolean;
40
+ imageParams: {
41
+ model?: string | undefined;
42
+ style?: string | undefined;
43
+ moderation?: string | undefined;
44
+ images?: Record<string, {
45
+ type: "image";
46
+ source: {
47
+ url: string;
48
+ kind: "url";
49
+ } | {
50
+ kind: "base64";
51
+ data: string;
52
+ } | {
53
+ text: string;
54
+ kind: "text";
55
+ } | {
56
+ path: string;
57
+ kind: "path";
58
+ };
59
+ }> | undefined;
60
+ };
61
+ movieFile: string | undefined;
62
+ } | {
63
+ images: string[];
64
+ imageParams: {
65
+ model?: string | undefined;
66
+ style?: string | undefined;
67
+ moderation?: string | undefined;
68
+ images?: Record<string, {
69
+ type: "image";
70
+ source: {
71
+ url: string;
72
+ kind: "url";
73
+ } | {
74
+ kind: "base64";
75
+ data: string;
76
+ } | {
77
+ text: string;
78
+ kind: "text";
79
+ } | {
80
+ path: string;
81
+ kind: "path";
82
+ };
83
+ }> | undefined;
84
+ };
85
+ movieFile: string | undefined;
86
+ imagePath: string;
87
+ referenceImage: string;
88
+ prompt: string;
89
+ }>;
3
90
  export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
91
+ export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;