mulmocast 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/text_and_image.json +6 -0
- package/assets/templates/text_only.json +6 -0
- package/lib/actions/audio.d.ts +4 -2
- package/lib/actions/audio.js +89 -48
- package/lib/actions/captions.d.ts +1 -1
- package/lib/actions/captions.js +17 -14
- package/lib/actions/images.d.ts +6 -3
- package/lib/actions/images.js +64 -39
- package/lib/actions/movie.js +19 -19
- package/lib/actions/pdf.js +3 -4
- package/lib/actions/translate.js +11 -11
- package/lib/agents/add_bgm_agent.js +3 -3
- package/lib/agents/combine_audio_files_agent.js +88 -42
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/tavily_agent.d.ts +15 -0
- package/lib/agents/tavily_agent.js +130 -0
- package/lib/cli/commands/audio/builder.d.ts +2 -0
- package/lib/cli/commands/image/builder.d.ts +2 -0
- package/lib/cli/commands/movie/builder.d.ts +2 -0
- package/lib/cli/commands/movie/handler.js +1 -6
- package/lib/cli/commands/pdf/builder.d.ts +2 -0
- package/lib/cli/commands/translate/builder.d.ts +2 -0
- package/lib/cli/common.d.ts +2 -0
- package/lib/cli/common.js +6 -0
- package/lib/cli/helpers.d.ts +7 -1
- package/lib/cli/helpers.js +30 -3
- package/lib/methods/index.d.ts +1 -1
- package/lib/methods/index.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +14 -0
- package/lib/methods/mulmo_presentation_style.js +70 -0
- package/lib/methods/mulmo_studio_context.d.ts +17 -0
- package/lib/methods/mulmo_studio_context.js +30 -2
- package/lib/tools/deep_research.d.ts +2 -0
- package/lib/tools/deep_research.js +265 -0
- package/lib/types/index.d.ts +0 -1
- package/lib/types/index.js +0 -1
- package/lib/types/schema.d.ts +101 -55
- package/lib/types/schema.js +3 -3
- package/lib/types/type.d.ts +5 -1
- package/lib/utils/ffmpeg_utils.d.ts +1 -0
- package/lib/utils/ffmpeg_utils.js +10 -0
- package/lib/utils/file.d.ts +7 -4
- package/lib/utils/file.js +24 -12
- package/lib/utils/preprocess.d.ts +0 -9
- package/lib/utils/preprocess.js +4 -10
- package/lib/utils/prompt.d.ts +3 -0
- package/lib/utils/prompt.js +52 -0
- package/package.json +11 -10
- package/assets/music/StarsBeyondEx.mp3 +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text and Image",
|
|
3
|
+
"description": "Template for Text and Image Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "image_prompts_template.json"
|
|
6
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text Only",
|
|
3
|
+
"description": "Template for Text Only Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "text_only_template.json"
|
|
6
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
|
+
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
4
5
|
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
5
|
-
export declare const
|
|
6
|
+
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
7
|
+
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI } from "graphai";
|
|
3
|
+
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
3
4
|
import * as agents from "@graphai/vanilla";
|
|
4
5
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
6
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
|
8
9
|
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
10
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
11
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
|
-
import {
|
|
12
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
13
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
-
import { getAudioArtifactFilePath,
|
|
14
|
+
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
14
15
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
15
16
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
17
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
@@ -24,7 +25,7 @@ const provider_to_agent = {
|
|
|
24
25
|
elevenlabs: "ttsElevenlabsAgent",
|
|
25
26
|
mock: "mediaMockAgent",
|
|
26
27
|
};
|
|
27
|
-
const getAudioPath = (context, beat, audioFile
|
|
28
|
+
const getAudioPath = (context, beat, audioFile) => {
|
|
28
29
|
if (beat.audio?.type === "audio") {
|
|
29
30
|
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
30
31
|
if (path) {
|
|
@@ -35,34 +36,48 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
35
36
|
if (beat.text === undefined || beat.text === "") {
|
|
36
37
|
return undefined; // It indicates that the audio is not needed.
|
|
37
38
|
}
|
|
38
|
-
return
|
|
39
|
+
return audioFile;
|
|
40
|
+
};
|
|
41
|
+
const getAudioParam = (presentationStyle, beat) => {
|
|
42
|
+
const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
|
|
43
|
+
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
44
|
+
const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
|
|
45
|
+
const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
|
|
46
|
+
return { voiceId, provider, speechOptions };
|
|
47
|
+
};
|
|
48
|
+
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
49
|
+
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
50
|
+
const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
|
|
51
|
+
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
|
|
52
|
+
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
53
|
+
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
54
|
+
return getAudioPath(context, beat, audioFile);
|
|
39
55
|
};
|
|
40
56
|
const preprocessor = (namedInputs) => {
|
|
41
|
-
const { beat, studioBeat, multiLingual, context
|
|
42
|
-
const { lang } = context;
|
|
43
|
-
const speaker = context.studio.script.speechParams.speakers[beat.speaker];
|
|
44
|
-
const voiceId = speaker.voiceId;
|
|
45
|
-
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
57
|
+
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
58
|
+
const { lang, presentationStyle } = context;
|
|
46
59
|
const text = localizedText(beat, multiLingual, lang);
|
|
47
|
-
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
51
|
-
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
52
|
-
studioBeat.audioFile = audioPath;
|
|
60
|
+
const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
|
|
61
|
+
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
62
|
+
studioBeat.audioFile = audioPath; // TODO
|
|
53
63
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
54
64
|
return {
|
|
55
65
|
ttsAgent: provider_to_agent[provider],
|
|
56
|
-
|
|
66
|
+
text,
|
|
57
67
|
voiceId,
|
|
58
68
|
speechOptions,
|
|
59
69
|
audioPath,
|
|
60
|
-
|
|
70
|
+
studioBeat,
|
|
61
71
|
needsTTS,
|
|
62
72
|
};
|
|
63
73
|
};
|
|
64
74
|
const graph_tts = {
|
|
65
75
|
nodes: {
|
|
76
|
+
beat: {},
|
|
77
|
+
studioBeat: {},
|
|
78
|
+
multiLingual: {},
|
|
79
|
+
context: {},
|
|
80
|
+
__mapIndex: {},
|
|
66
81
|
preprocessor: {
|
|
67
82
|
agent: preprocessor,
|
|
68
83
|
inputs: {
|
|
@@ -70,7 +85,6 @@ const graph_tts = {
|
|
|
70
85
|
studioBeat: ":studioBeat",
|
|
71
86
|
multiLingual: ":multiLingual",
|
|
72
87
|
context: ":context",
|
|
73
|
-
audioDirPath: ":audioDirPath",
|
|
74
88
|
},
|
|
75
89
|
},
|
|
76
90
|
tts: {
|
|
@@ -100,17 +114,13 @@ const graph_data = {
|
|
|
100
114
|
audioArtifactFilePath: {},
|
|
101
115
|
audioCombinedFilePath: {},
|
|
102
116
|
outputStudioFilePath: {},
|
|
103
|
-
audioDirPath: {},
|
|
104
|
-
audioSegmentDirPath: {},
|
|
105
117
|
musicFile: {},
|
|
106
118
|
map: {
|
|
107
119
|
agent: "mapAgent",
|
|
108
120
|
inputs: {
|
|
109
121
|
rows: ":context.studio.script.beats",
|
|
110
122
|
studioBeat: ":context.studio.beats",
|
|
111
|
-
multiLingual: ":context.
|
|
112
|
-
audioDirPath: ":audioDirPath",
|
|
113
|
-
audioSegmentDirPath: ":audioSegmentDirPath",
|
|
123
|
+
multiLingual: ":context.multiLingual",
|
|
114
124
|
context: ":context",
|
|
115
125
|
},
|
|
116
126
|
params: {
|
|
@@ -122,7 +132,7 @@ const graph_data = {
|
|
|
122
132
|
combineFiles: {
|
|
123
133
|
agent: "combineAudioFilesAgent",
|
|
124
134
|
inputs: {
|
|
125
|
-
|
|
135
|
+
onComplete: ":map",
|
|
126
136
|
context: ":context",
|
|
127
137
|
combinedFileName: ":audioCombinedFilePath",
|
|
128
138
|
},
|
|
@@ -141,7 +151,7 @@ const graph_data = {
|
|
|
141
151
|
wait: ":combineFiles",
|
|
142
152
|
voiceFile: ":audioCombinedFilePath",
|
|
143
153
|
outputFile: ":audioArtifactFilePath",
|
|
144
|
-
|
|
154
|
+
context: ":context",
|
|
145
155
|
params: {
|
|
146
156
|
musicFile: ":musicFile",
|
|
147
157
|
},
|
|
@@ -172,49 +182,80 @@ export const audioFilePath = (context) => {
|
|
|
172
182
|
const { outDirPath } = fileDirs;
|
|
173
183
|
return getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
174
184
|
};
|
|
185
|
+
const getConcurrency = (context) => {
|
|
186
|
+
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
187
|
+
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
188
|
+
const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
|
|
189
|
+
return provider === "nijivoice" || provider === "elevenlabs";
|
|
190
|
+
});
|
|
191
|
+
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
192
|
+
};
|
|
193
|
+
const audioAgents = {
|
|
194
|
+
...vanillaAgents,
|
|
195
|
+
fileWriteAgent,
|
|
196
|
+
ttsOpenaiAgent,
|
|
197
|
+
ttsNijivoiceAgent,
|
|
198
|
+
ttsGoogleAgent,
|
|
199
|
+
ttsElevenlabsAgent,
|
|
200
|
+
addBGMAgent,
|
|
201
|
+
combineAudioFilesAgent,
|
|
202
|
+
};
|
|
203
|
+
export const generateBeatAudio = async (index, context, callbacks) => {
|
|
204
|
+
try {
|
|
205
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
206
|
+
const { studio, fileDirs } = context;
|
|
207
|
+
const { outDirPath, audioDirPath } = fileDirs;
|
|
208
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
209
|
+
mkdir(outDirPath);
|
|
210
|
+
mkdir(audioSegmentDirPath);
|
|
211
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
212
|
+
const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
|
|
213
|
+
graph.injectValue("__mapIndex", index);
|
|
214
|
+
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
215
|
+
graph.injectValue("studioBeat", context.studio.beats[index]);
|
|
216
|
+
graph.injectValue("multiLingual", context.multiLingual);
|
|
217
|
+
graph.injectValue("context", context);
|
|
218
|
+
if (callbacks) {
|
|
219
|
+
callbacks.forEach((callback) => {
|
|
220
|
+
graph.registerCallback(callback);
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
await graph.run();
|
|
224
|
+
}
|
|
225
|
+
finally {
|
|
226
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
227
|
+
}
|
|
228
|
+
};
|
|
175
229
|
export const audio = async (context, callbacks) => {
|
|
176
230
|
try {
|
|
177
231
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
178
232
|
const { studio, fileDirs, lang } = context;
|
|
179
233
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
180
234
|
const audioArtifactFilePath = audioFilePath(context);
|
|
181
|
-
const audioSegmentDirPath =
|
|
182
|
-
const audioCombinedFilePath =
|
|
235
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
236
|
+
const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
|
|
183
237
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
184
238
|
mkdir(outDirPath);
|
|
185
239
|
mkdir(audioSegmentDirPath);
|
|
186
|
-
|
|
187
|
-
const
|
|
188
|
-
const provider = speaker.provider ?? studio.script.speechParams.provider;
|
|
189
|
-
return provider === "nijivoice" || provider === "elevenlabs";
|
|
190
|
-
});
|
|
191
|
-
graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
|
|
192
|
-
const graph = new GraphAI(graph_data, {
|
|
193
|
-
...vanillaAgents,
|
|
194
|
-
fileWriteAgent,
|
|
195
|
-
ttsOpenaiAgent,
|
|
196
|
-
ttsNijivoiceAgent,
|
|
197
|
-
ttsGoogleAgent,
|
|
198
|
-
ttsElevenlabsAgent,
|
|
199
|
-
addBGMAgent,
|
|
200
|
-
combineAudioFilesAgent,
|
|
201
|
-
}, { agentFilters });
|
|
240
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
241
|
+
const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
|
|
202
242
|
graph.injectValue("context", context);
|
|
203
243
|
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
204
244
|
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
|
205
245
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
206
|
-
graph.injectValue("
|
|
207
|
-
graph.injectValue("audioDirPath", audioDirPath);
|
|
208
|
-
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
246
|
+
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
209
247
|
if (callbacks) {
|
|
210
248
|
callbacks.forEach((callback) => {
|
|
211
249
|
graph.registerCallback(callback);
|
|
212
250
|
});
|
|
213
251
|
}
|
|
214
|
-
await graph.run();
|
|
252
|
+
const result = await graph.run();
|
|
215
253
|
writingMessage(audioCombinedFilePath);
|
|
254
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
255
|
+
return result.combineFiles;
|
|
216
256
|
}
|
|
217
|
-
|
|
257
|
+
catch (__error) {
|
|
218
258
|
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
259
|
+
throw __error;
|
|
219
260
|
}
|
|
220
261
|
};
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import { MulmoStudioContext } from "../types/index.js";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
|
-
export declare const captions: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<
|
|
3
|
+
export declare const captions: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
package/lib/actions/captions.js
CHANGED
|
@@ -26,11 +26,11 @@ const graph_data = {
|
|
|
26
26
|
const { fileDirs } = namedInputs.context;
|
|
27
27
|
const { caption } = context;
|
|
28
28
|
const { imageDirPath } = fileDirs;
|
|
29
|
-
const { canvasSize } = context.
|
|
29
|
+
const { canvasSize } = context.presentationStyle;
|
|
30
30
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
|
|
31
31
|
const template = getHTMLFile("caption");
|
|
32
32
|
const text = (() => {
|
|
33
|
-
const multiLingual = context.
|
|
33
|
+
const multiLingual = context.multiLingual;
|
|
34
34
|
if (caption && multiLingual) {
|
|
35
35
|
return multiLingual[index].multiLingualTexts[caption].text;
|
|
36
36
|
}
|
|
@@ -63,18 +63,21 @@ const graph_data = {
|
|
|
63
63
|
},
|
|
64
64
|
};
|
|
65
65
|
export const captions = async (context, callbacks) => {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
callbacks
|
|
72
|
-
|
|
73
|
-
|
|
66
|
+
if (context.caption) {
|
|
67
|
+
try {
|
|
68
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", true);
|
|
69
|
+
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
|
70
|
+
graph.injectValue("context", context);
|
|
71
|
+
if (callbacks) {
|
|
72
|
+
callbacks.forEach((callback) => {
|
|
73
|
+
graph.registerCallback(callback);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
await graph.run();
|
|
77
|
+
}
|
|
78
|
+
finally {
|
|
79
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", false);
|
|
74
80
|
}
|
|
75
|
-
await graph.run();
|
|
76
|
-
}
|
|
77
|
-
finally {
|
|
78
|
-
MulmoStudioContextMethods.setSessionState(context, "caption", false);
|
|
79
81
|
}
|
|
82
|
+
return context;
|
|
80
83
|
};
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -4,8 +4,6 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
4
4
|
context: MulmoStudioContext;
|
|
5
5
|
beat: MulmoBeat;
|
|
6
6
|
index: number;
|
|
7
|
-
suffix: string;
|
|
8
|
-
imageDirPath: string;
|
|
9
7
|
imageAgentInfo: Text2ImageAgentInfo;
|
|
10
8
|
imageRefs: Record<string, string>;
|
|
11
9
|
}) => Promise<{
|
|
@@ -32,8 +30,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
32
30
|
};
|
|
33
31
|
movieFile: string | undefined;
|
|
34
32
|
imagePath: string | undefined;
|
|
33
|
+
referenceImage: string | undefined;
|
|
35
34
|
} | {
|
|
35
|
+
imagePath: string;
|
|
36
36
|
images: string[];
|
|
37
|
+
imageFromMovie: boolean;
|
|
37
38
|
imageParams: {
|
|
38
39
|
model?: string | undefined;
|
|
39
40
|
style?: string | undefined;
|
|
@@ -81,7 +82,9 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
81
82
|
};
|
|
82
83
|
movieFile: string | undefined;
|
|
83
84
|
imagePath: string;
|
|
85
|
+
referenceImage: string;
|
|
84
86
|
prompt: string;
|
|
85
87
|
}>;
|
|
86
|
-
export declare const
|
|
88
|
+
export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
|
|
89
|
+
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
|
87
90
|
export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -1,41 +1,43 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
|
+
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
4
5
|
import * as agents from "@graphai/vanilla";
|
|
5
6
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
|
-
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
7
|
+
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
7
8
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
8
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
|
|
9
|
-
import {
|
|
10
|
+
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
10
11
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
12
|
import { imagePrompt } from "../utils/prompt.js";
|
|
12
13
|
const vanillaAgents = agents.default ?? agents;
|
|
13
14
|
dotenv.config();
|
|
14
15
|
// const openai = new OpenAI();
|
|
15
16
|
import { GoogleAuth } from "google-auth-library";
|
|
16
|
-
|
|
17
|
+
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
18
|
+
const htmlStyle = (context, beat) => {
|
|
17
19
|
return {
|
|
18
|
-
canvasSize:
|
|
19
|
-
textSlideStyle:
|
|
20
|
+
canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
|
|
21
|
+
textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
|
|
20
22
|
};
|
|
21
23
|
};
|
|
22
24
|
export const imagePreprocessAgent = async (namedInputs) => {
|
|
23
|
-
const { context, beat, index,
|
|
25
|
+
const { context, beat, index, imageAgentInfo, imageRefs } = namedInputs;
|
|
24
26
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
25
|
-
const imagePath =
|
|
27
|
+
const imagePath = getBeatPngImagePath(context, index);
|
|
26
28
|
const returnValue = {
|
|
27
29
|
imageParams,
|
|
28
|
-
movieFile: beat.moviePrompt ?
|
|
30
|
+
movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
|
|
29
31
|
};
|
|
30
32
|
if (beat.image) {
|
|
31
33
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
32
34
|
if (plugin) {
|
|
33
35
|
try {
|
|
34
36
|
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
35
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context
|
|
37
|
+
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
36
38
|
const path = await plugin.process(processorParams);
|
|
37
39
|
// undefined prompt indicates that image generation is not needed
|
|
38
|
-
return { imagePath: path, ...returnValue };
|
|
40
|
+
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
39
41
|
}
|
|
40
42
|
finally {
|
|
41
43
|
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
@@ -49,17 +51,16 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
49
51
|
return sources.filter((source) => source !== undefined);
|
|
50
52
|
})();
|
|
51
53
|
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
52
|
-
return { ...returnValue, images }; // no image prompt, only movie prompt
|
|
54
|
+
return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
53
55
|
}
|
|
54
56
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
55
|
-
return { imagePath, prompt, ...returnValue, images };
|
|
57
|
+
return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
56
58
|
};
|
|
57
59
|
const beat_graph_data = {
|
|
58
60
|
version: 0.5,
|
|
59
61
|
concurrency: 4,
|
|
60
62
|
nodes: {
|
|
61
63
|
context: {},
|
|
62
|
-
imageDirPath: {},
|
|
63
64
|
imageAgentInfo: {},
|
|
64
65
|
movieAgentInfo: {},
|
|
65
66
|
imageRefs: {},
|
|
@@ -71,8 +72,6 @@ const beat_graph_data = {
|
|
|
71
72
|
context: ":context",
|
|
72
73
|
beat: ":beat",
|
|
73
74
|
index: ":__mapIndex",
|
|
74
|
-
suffix: "p",
|
|
75
|
-
imageDirPath: ":imageDirPath",
|
|
76
75
|
imageAgentInfo: ":imageAgentInfo",
|
|
77
76
|
imageRefs: ":imageRefs",
|
|
78
77
|
},
|
|
@@ -93,7 +92,7 @@ const beat_graph_data = {
|
|
|
93
92
|
params: {
|
|
94
93
|
model: ":preprocessor.imageParams.model",
|
|
95
94
|
moderation: ":preprocessor.imageParams.moderation",
|
|
96
|
-
canvasSize: ":context.
|
|
95
|
+
canvasSize: ":context.presentationStyle.canvasSize",
|
|
97
96
|
},
|
|
98
97
|
},
|
|
99
98
|
defaultValue: {},
|
|
@@ -104,24 +103,37 @@ const beat_graph_data = {
|
|
|
104
103
|
inputs: {
|
|
105
104
|
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
106
105
|
prompt: ":beat.moviePrompt",
|
|
107
|
-
imagePath: ":preprocessor.
|
|
106
|
+
imagePath: ":preprocessor.referenceImage",
|
|
108
107
|
file: ":preprocessor.movieFile",
|
|
109
108
|
studio: ":context.studio", // for cache
|
|
110
109
|
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
111
110
|
index: ":__mapIndex", // for cache
|
|
112
111
|
sessionType: "movie", // for cache
|
|
113
112
|
params: {
|
|
114
|
-
model: ":context.
|
|
113
|
+
model: ":context.presentationStyle.movieParams.model",
|
|
115
114
|
duration: ":beat.duration",
|
|
116
|
-
canvasSize: ":context.
|
|
115
|
+
canvasSize: ":context.presentationStyle.canvasSize",
|
|
117
116
|
},
|
|
118
117
|
},
|
|
119
118
|
defaultValue: {},
|
|
120
119
|
},
|
|
120
|
+
imageFromMovie: {
|
|
121
|
+
if: ":preprocessor.imageFromMovie",
|
|
122
|
+
agent: async (namedInputs) => {
|
|
123
|
+
await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
124
|
+
return { generatedImage: true };
|
|
125
|
+
},
|
|
126
|
+
inputs: {
|
|
127
|
+
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
128
|
+
imageFile: ":preprocessor.imagePath",
|
|
129
|
+
movieFile: ":preprocessor.movieFile",
|
|
130
|
+
},
|
|
131
|
+
defaultValue: { generatedImage: false },
|
|
132
|
+
},
|
|
121
133
|
output: {
|
|
122
134
|
agent: "copyAgent",
|
|
123
135
|
inputs: {
|
|
124
|
-
onComplete: ":
|
|
136
|
+
onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
|
|
125
137
|
imageFile: ":preprocessor.imagePath",
|
|
126
138
|
movieFile: ":preprocessor.movieFile",
|
|
127
139
|
},
|
|
@@ -138,7 +150,6 @@ const graph_data = {
|
|
|
138
150
|
concurrency: 4,
|
|
139
151
|
nodes: {
|
|
140
152
|
context: {},
|
|
141
|
-
imageDirPath: {},
|
|
142
153
|
imageAgentInfo: {},
|
|
143
154
|
movieAgentInfo: {},
|
|
144
155
|
outputStudioFilePath: {},
|
|
@@ -150,7 +161,6 @@ const graph_data = {
|
|
|
150
161
|
context: ":context",
|
|
151
162
|
imageAgentInfo: ":imageAgentInfo",
|
|
152
163
|
movieAgentInfo: ":movieAgentInfo",
|
|
153
|
-
imageDirPath: ":imageDirPath",
|
|
154
164
|
imageRefs: ":imageRefs",
|
|
155
165
|
},
|
|
156
166
|
isResult: true,
|
|
@@ -185,7 +195,10 @@ const graph_data = {
|
|
|
185
195
|
}
|
|
186
196
|
}
|
|
187
197
|
});
|
|
188
|
-
return {
|
|
198
|
+
return {
|
|
199
|
+
...context,
|
|
200
|
+
studio,
|
|
201
|
+
};
|
|
189
202
|
},
|
|
190
203
|
inputs: {
|
|
191
204
|
array: ":map.output",
|
|
@@ -217,7 +230,6 @@ const googleAuth = async () => {
|
|
|
217
230
|
}
|
|
218
231
|
};
|
|
219
232
|
const graphOption = async (context) => {
|
|
220
|
-
const { studio } = context;
|
|
221
233
|
const agentFilters = [
|
|
222
234
|
{
|
|
223
235
|
name: "fileCacheAgentFilter",
|
|
@@ -225,12 +237,14 @@ const graphOption = async (context) => {
|
|
|
225
237
|
nodeIds: ["imageGenerator", "movieGenerator"],
|
|
226
238
|
},
|
|
227
239
|
];
|
|
240
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
228
241
|
const options = {
|
|
229
242
|
agentFilters,
|
|
243
|
+
taskManager,
|
|
230
244
|
};
|
|
231
|
-
const imageAgentInfo =
|
|
245
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
232
246
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
233
|
-
if (imageAgentInfo.provider === "google" ||
|
|
247
|
+
if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
234
248
|
GraphAILogger.log("google was specified as text2image engine");
|
|
235
249
|
const token = await googleAuth();
|
|
236
250
|
options.config = {
|
|
@@ -246,13 +260,10 @@ const graphOption = async (context) => {
|
|
|
246
260
|
}
|
|
247
261
|
return options;
|
|
248
262
|
};
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
const { outDirPath, imageDirPath } = fileDirs;
|
|
252
|
-
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
253
|
-
const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script, context.dryRun);
|
|
263
|
+
// TODO: unit test
|
|
264
|
+
export const getImageRefs = async (context) => {
|
|
254
265
|
const imageRefs = {};
|
|
255
|
-
const images =
|
|
266
|
+
const images = context.presentationStyle.imageParams?.images;
|
|
256
267
|
if (images) {
|
|
257
268
|
await Promise.all(Object.keys(images).map(async (key) => {
|
|
258
269
|
const image = images[key];
|
|
@@ -283,12 +294,21 @@ const prepareGenerateImages = async (context) => {
|
|
|
283
294
|
return "png"; // default
|
|
284
295
|
}
|
|
285
296
|
})();
|
|
286
|
-
const imagePath =
|
|
297
|
+
const imagePath = getReferenceImagePath(context, key, extension);
|
|
287
298
|
await fs.promises.writeFile(imagePath, buffer);
|
|
288
299
|
imageRefs[key] = imagePath;
|
|
289
300
|
}
|
|
290
301
|
}));
|
|
291
302
|
}
|
|
303
|
+
return imageRefs;
|
|
304
|
+
};
|
|
305
|
+
const prepareGenerateImages = async (context) => {
|
|
306
|
+
const { studio } = context;
|
|
307
|
+
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
308
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
309
|
+
mkdir(imageProjectDirPath);
|
|
310
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
|
|
311
|
+
const imageRefs = await getImageRefs(context);
|
|
292
312
|
GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
|
|
293
313
|
const injections = {
|
|
294
314
|
context,
|
|
@@ -297,19 +317,21 @@ const prepareGenerateImages = async (context) => {
|
|
|
297
317
|
agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
|
|
298
318
|
},
|
|
299
319
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
|
|
300
|
-
imageDirPath,
|
|
301
320
|
imageRefs,
|
|
302
321
|
};
|
|
303
322
|
return injections;
|
|
304
323
|
};
|
|
305
|
-
const
|
|
306
|
-
const imageAgentInfo =
|
|
324
|
+
const getConcurrency = (context) => {
|
|
325
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
307
326
|
if (imageAgentInfo.provider === "openai") {
|
|
308
327
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
309
328
|
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
310
329
|
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
311
|
-
|
|
330
|
+
return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
|
|
312
331
|
}
|
|
332
|
+
return 4;
|
|
333
|
+
};
|
|
334
|
+
const generateImages = async (context, callbacks) => {
|
|
313
335
|
const options = await graphOption(context);
|
|
314
336
|
const injections = await prepareGenerateImages(context);
|
|
315
337
|
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
|
|
@@ -327,10 +349,13 @@ const generateImages = async (context, callbacks) => {
|
|
|
327
349
|
export const images = async (context, callbacks) => {
|
|
328
350
|
try {
|
|
329
351
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
330
|
-
await generateImages(context, callbacks);
|
|
352
|
+
const newContext = await generateImages(context, callbacks);
|
|
353
|
+
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
354
|
+
return newContext;
|
|
331
355
|
}
|
|
332
|
-
|
|
356
|
+
catch (error) {
|
|
333
357
|
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
358
|
+
throw error;
|
|
334
359
|
}
|
|
335
360
|
};
|
|
336
361
|
export const generateBeatImage = async (index, context, callbacks) => {
|