mulmocast 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/text_and_image.json +6 -0
- package/assets/templates/text_only.json +6 -0
- package/lib/actions/audio.d.ts +3 -1
- package/lib/actions/audio.js +82 -44
- package/lib/actions/captions.js +1 -1
- package/lib/actions/images.d.ts +4 -0
- package/lib/actions/images.js +40 -21
- package/lib/actions/movie.js +19 -19
- package/lib/actions/pdf.js +2 -2
- package/lib/actions/translate.js +1 -1
- package/lib/agents/add_bgm_agent.js +3 -3
- package/lib/agents/combine_audio_files_agent.js +1 -1
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/tavily_agent.d.ts +15 -0
- package/lib/agents/tavily_agent.js +130 -0
- package/lib/cli/commands/audio/builder.d.ts +2 -0
- package/lib/cli/commands/image/builder.d.ts +2 -0
- package/lib/cli/commands/movie/builder.d.ts +2 -0
- package/lib/cli/commands/pdf/builder.d.ts +2 -0
- package/lib/cli/commands/translate/builder.d.ts +2 -0
- package/lib/cli/common.d.ts +2 -0
- package/lib/cli/common.js +6 -0
- package/lib/cli/helpers.d.ts +5 -1
- package/lib/cli/helpers.js +18 -2
- package/lib/methods/index.d.ts +1 -1
- package/lib/methods/index.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +14 -0
- package/lib/methods/mulmo_presentation_style.js +70 -0
- package/lib/methods/mulmo_studio_context.d.ts +14 -0
- package/lib/methods/mulmo_studio_context.js +20 -2
- package/lib/tools/deep_research.d.ts +2 -0
- package/lib/tools/deep_research.js +265 -0
- package/lib/types/schema.d.ts +31 -0
- package/lib/types/schema.js +1 -1
- package/lib/types/type.d.ts +3 -1
- package/lib/utils/ffmpeg_utils.d.ts +1 -0
- package/lib/utils/ffmpeg_utils.js +10 -0
- package/lib/utils/file.d.ts +1 -3
- package/lib/utils/file.js +4 -11
- package/lib/utils/preprocess.js +1 -0
- package/lib/utils/prompt.d.ts +3 -0
- package/lib/utils/prompt.js +52 -0
- package/package.json +4 -3
- package/assets/music/StarsBeyondEx.mp3 +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text and Image",
|
|
3
|
+
"description": "Template for Text and Image Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "image_prompts_template.json"
|
|
6
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text Only",
|
|
3
|
+
"description": "Template for Text Only Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "text_only_template.json"
|
|
6
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
|
+
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
4
5
|
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
6
|
+
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
5
7
|
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI } from "graphai";
|
|
3
|
+
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
3
4
|
import * as agents from "@graphai/vanilla";
|
|
4
5
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
6
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
|
8
9
|
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
10
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
11
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
|
-
import {
|
|
12
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
13
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
-
import { getAudioArtifactFilePath,
|
|
14
|
+
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
14
15
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
15
16
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
17
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
@@ -24,7 +25,7 @@ const provider_to_agent = {
|
|
|
24
25
|
elevenlabs: "ttsElevenlabsAgent",
|
|
25
26
|
mock: "mediaMockAgent",
|
|
26
27
|
};
|
|
27
|
-
const getAudioPath = (context, beat, audioFile
|
|
28
|
+
const getAudioPath = (context, beat, audioFile) => {
|
|
28
29
|
if (beat.audio?.type === "audio") {
|
|
29
30
|
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
30
31
|
if (path) {
|
|
@@ -35,34 +36,48 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
35
36
|
if (beat.text === undefined || beat.text === "") {
|
|
36
37
|
return undefined; // It indicates that the audio is not needed.
|
|
37
38
|
}
|
|
38
|
-
return
|
|
39
|
+
return audioFile;
|
|
40
|
+
};
|
|
41
|
+
const getAudioParam = (presentationStyle, beat) => {
|
|
42
|
+
const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
|
|
43
|
+
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
44
|
+
const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
|
|
45
|
+
const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
|
|
46
|
+
return { voiceId, provider, speechOptions };
|
|
47
|
+
};
|
|
48
|
+
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
49
|
+
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
50
|
+
const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
|
|
51
|
+
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
|
|
52
|
+
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
53
|
+
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
54
|
+
return getAudioPath(context, beat, audioFile);
|
|
39
55
|
};
|
|
40
56
|
const preprocessor = (namedInputs) => {
|
|
41
|
-
const { beat, studioBeat, multiLingual, context
|
|
42
|
-
const { lang } = context;
|
|
43
|
-
const speaker = context.studio.script.speechParams.speakers[beat.speaker];
|
|
44
|
-
const voiceId = speaker.voiceId;
|
|
45
|
-
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
57
|
+
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
58
|
+
const { lang, presentationStyle } = context;
|
|
46
59
|
const text = localizedText(beat, multiLingual, lang);
|
|
47
|
-
|
|
48
|
-
const
|
|
49
|
-
const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
|
|
50
|
-
const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
51
|
-
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
60
|
+
const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
|
|
61
|
+
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
52
62
|
studioBeat.audioFile = audioPath;
|
|
53
63
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
54
64
|
return {
|
|
55
65
|
ttsAgent: provider_to_agent[provider],
|
|
56
|
-
|
|
66
|
+
text,
|
|
57
67
|
voiceId,
|
|
58
68
|
speechOptions,
|
|
59
69
|
audioPath,
|
|
60
|
-
|
|
70
|
+
studioBeat,
|
|
61
71
|
needsTTS,
|
|
62
72
|
};
|
|
63
73
|
};
|
|
64
74
|
const graph_tts = {
|
|
65
75
|
nodes: {
|
|
76
|
+
beat: {},
|
|
77
|
+
studioBeat: {},
|
|
78
|
+
multiLingual: {},
|
|
79
|
+
context: {},
|
|
80
|
+
__mapIndex: {},
|
|
66
81
|
preprocessor: {
|
|
67
82
|
agent: preprocessor,
|
|
68
83
|
inputs: {
|
|
@@ -70,7 +85,6 @@ const graph_tts = {
|
|
|
70
85
|
studioBeat: ":studioBeat",
|
|
71
86
|
multiLingual: ":multiLingual",
|
|
72
87
|
context: ":context",
|
|
73
|
-
audioDirPath: ":audioDirPath",
|
|
74
88
|
},
|
|
75
89
|
},
|
|
76
90
|
tts: {
|
|
@@ -100,8 +114,6 @@ const graph_data = {
|
|
|
100
114
|
audioArtifactFilePath: {},
|
|
101
115
|
audioCombinedFilePath: {},
|
|
102
116
|
outputStudioFilePath: {},
|
|
103
|
-
audioDirPath: {},
|
|
104
|
-
audioSegmentDirPath: {},
|
|
105
117
|
musicFile: {},
|
|
106
118
|
map: {
|
|
107
119
|
agent: "mapAgent",
|
|
@@ -109,8 +121,6 @@ const graph_data = {
|
|
|
109
121
|
rows: ":context.studio.script.beats",
|
|
110
122
|
studioBeat: ":context.studio.beats",
|
|
111
123
|
multiLingual: ":context.studio.multiLingual",
|
|
112
|
-
audioDirPath: ":audioDirPath",
|
|
113
|
-
audioSegmentDirPath: ":audioSegmentDirPath",
|
|
114
124
|
context: ":context",
|
|
115
125
|
},
|
|
116
126
|
params: {
|
|
@@ -122,7 +132,7 @@ const graph_data = {
|
|
|
122
132
|
combineFiles: {
|
|
123
133
|
agent: "combineAudioFilesAgent",
|
|
124
134
|
inputs: {
|
|
125
|
-
|
|
135
|
+
onComplete: ":map",
|
|
126
136
|
context: ":context",
|
|
127
137
|
combinedFileName: ":audioCombinedFilePath",
|
|
128
138
|
},
|
|
@@ -141,7 +151,7 @@ const graph_data = {
|
|
|
141
151
|
wait: ":combineFiles",
|
|
142
152
|
voiceFile: ":audioCombinedFilePath",
|
|
143
153
|
outputFile: ":audioArtifactFilePath",
|
|
144
|
-
|
|
154
|
+
context: ":context",
|
|
145
155
|
params: {
|
|
146
156
|
musicFile: ":musicFile",
|
|
147
157
|
},
|
|
@@ -172,40 +182,68 @@ export const audioFilePath = (context) => {
|
|
|
172
182
|
const { outDirPath } = fileDirs;
|
|
173
183
|
return getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
174
184
|
};
|
|
185
|
+
const getConcurrency = (context) => {
|
|
186
|
+
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
187
|
+
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
188
|
+
const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
|
|
189
|
+
return provider === "nijivoice" || provider === "elevenlabs";
|
|
190
|
+
});
|
|
191
|
+
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
192
|
+
};
|
|
193
|
+
const audioAgents = {
|
|
194
|
+
...vanillaAgents,
|
|
195
|
+
fileWriteAgent,
|
|
196
|
+
ttsOpenaiAgent,
|
|
197
|
+
ttsNijivoiceAgent,
|
|
198
|
+
ttsGoogleAgent,
|
|
199
|
+
ttsElevenlabsAgent,
|
|
200
|
+
addBGMAgent,
|
|
201
|
+
combineAudioFilesAgent,
|
|
202
|
+
};
|
|
203
|
+
export const generateBeatAudio = async (index, context, callbacks) => {
|
|
204
|
+
try {
|
|
205
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
206
|
+
const { studio, fileDirs } = context;
|
|
207
|
+
const { outDirPath, audioDirPath } = fileDirs;
|
|
208
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
209
|
+
mkdir(outDirPath);
|
|
210
|
+
mkdir(audioSegmentDirPath);
|
|
211
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
212
|
+
const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
|
|
213
|
+
graph.injectValue("__mapIndex", index);
|
|
214
|
+
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
215
|
+
graph.injectValue("studioBeat", context.studio.beats[index]);
|
|
216
|
+
graph.injectValue("multiLingual", context.studio.multiLingual);
|
|
217
|
+
graph.injectValue("context", context);
|
|
218
|
+
if (callbacks) {
|
|
219
|
+
callbacks.forEach((callback) => {
|
|
220
|
+
graph.registerCallback(callback);
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
await graph.run();
|
|
224
|
+
}
|
|
225
|
+
finally {
|
|
226
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
227
|
+
}
|
|
228
|
+
};
|
|
175
229
|
export const audio = async (context, callbacks) => {
|
|
176
230
|
try {
|
|
177
231
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
178
232
|
const { studio, fileDirs, lang } = context;
|
|
179
233
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
180
234
|
const audioArtifactFilePath = audioFilePath(context);
|
|
181
|
-
const audioSegmentDirPath =
|
|
182
|
-
const audioCombinedFilePath =
|
|
235
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
236
|
+
const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
|
|
183
237
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
184
238
|
mkdir(outDirPath);
|
|
185
239
|
mkdir(audioSegmentDirPath);
|
|
186
|
-
|
|
187
|
-
const
|
|
188
|
-
const provider = speaker.provider ?? studio.script.speechParams.provider;
|
|
189
|
-
return provider === "nijivoice" || provider === "elevenlabs";
|
|
190
|
-
});
|
|
191
|
-
graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
|
|
192
|
-
const graph = new GraphAI(graph_data, {
|
|
193
|
-
...vanillaAgents,
|
|
194
|
-
fileWriteAgent,
|
|
195
|
-
ttsOpenaiAgent,
|
|
196
|
-
ttsNijivoiceAgent,
|
|
197
|
-
ttsGoogleAgent,
|
|
198
|
-
ttsElevenlabsAgent,
|
|
199
|
-
addBGMAgent,
|
|
200
|
-
combineAudioFilesAgent,
|
|
201
|
-
}, { agentFilters });
|
|
240
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
241
|
+
const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
|
|
202
242
|
graph.injectValue("context", context);
|
|
203
243
|
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
204
244
|
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
|
205
245
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
206
|
-
graph.injectValue("
|
|
207
|
-
graph.injectValue("audioDirPath", audioDirPath);
|
|
208
|
-
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
246
|
+
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
209
247
|
if (callbacks) {
|
|
210
248
|
callbacks.forEach((callback) => {
|
|
211
249
|
graph.registerCallback(callback);
|
package/lib/actions/captions.js
CHANGED
|
@@ -26,7 +26,7 @@ const graph_data = {
|
|
|
26
26
|
const { fileDirs } = namedInputs.context;
|
|
27
27
|
const { caption } = context;
|
|
28
28
|
const { imageDirPath } = fileDirs;
|
|
29
|
-
const { canvasSize } = context.
|
|
29
|
+
const { canvasSize } = context.presentationStyle;
|
|
30
30
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
|
|
31
31
|
const template = getHTMLFile("caption");
|
|
32
32
|
const text = (() => {
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -32,8 +32,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
32
32
|
};
|
|
33
33
|
movieFile: string | undefined;
|
|
34
34
|
imagePath: string | undefined;
|
|
35
|
+
referenceImage: string | undefined;
|
|
35
36
|
} | {
|
|
37
|
+
imagePath: string;
|
|
36
38
|
images: string[];
|
|
39
|
+
imageFromMovie: boolean;
|
|
37
40
|
imageParams: {
|
|
38
41
|
model?: string | undefined;
|
|
39
42
|
style?: string | undefined;
|
|
@@ -81,6 +84,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
81
84
|
};
|
|
82
85
|
movieFile: string | undefined;
|
|
83
86
|
imagePath: string;
|
|
87
|
+
referenceImage: string;
|
|
84
88
|
prompt: string;
|
|
85
89
|
}>;
|
|
86
90
|
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
|
+
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
4
5
|
import * as agents from "@graphai/vanilla";
|
|
5
6
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
7
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
7
8
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
8
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
|
|
9
|
-
import {
|
|
10
|
+
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
10
11
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
12
|
import { imagePrompt } from "../utils/prompt.js";
|
|
12
13
|
const vanillaAgents = agents.default ?? agents;
|
|
13
14
|
dotenv.config();
|
|
14
15
|
// const openai = new OpenAI();
|
|
15
16
|
import { GoogleAuth } from "google-auth-library";
|
|
16
|
-
|
|
17
|
+
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
18
|
+
const htmlStyle = (context, beat) => {
|
|
17
19
|
return {
|
|
18
|
-
canvasSize:
|
|
19
|
-
textSlideStyle:
|
|
20
|
+
canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
|
|
21
|
+
textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
|
|
20
22
|
};
|
|
21
23
|
};
|
|
22
24
|
export const imagePreprocessAgent = async (namedInputs) => {
|
|
@@ -32,10 +34,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
32
34
|
if (plugin) {
|
|
33
35
|
try {
|
|
34
36
|
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
35
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context
|
|
37
|
+
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
36
38
|
const path = await plugin.process(processorParams);
|
|
37
39
|
// undefined prompt indicates that image generation is not needed
|
|
38
|
-
return { imagePath: path, ...returnValue };
|
|
40
|
+
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
39
41
|
}
|
|
40
42
|
finally {
|
|
41
43
|
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
@@ -49,10 +51,10 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
49
51
|
return sources.filter((source) => source !== undefined);
|
|
50
52
|
})();
|
|
51
53
|
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
52
|
-
return { ...returnValue, images }; // no image prompt, only movie prompt
|
|
54
|
+
return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
53
55
|
}
|
|
54
56
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
55
|
-
return { imagePath, prompt, ...returnValue, images };
|
|
57
|
+
return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
56
58
|
};
|
|
57
59
|
const beat_graph_data = {
|
|
58
60
|
version: 0.5,
|
|
@@ -93,7 +95,7 @@ const beat_graph_data = {
|
|
|
93
95
|
params: {
|
|
94
96
|
model: ":preprocessor.imageParams.model",
|
|
95
97
|
moderation: ":preprocessor.imageParams.moderation",
|
|
96
|
-
canvasSize: ":context.
|
|
98
|
+
canvasSize: ":context.presentationStyle.canvasSize",
|
|
97
99
|
},
|
|
98
100
|
},
|
|
99
101
|
defaultValue: {},
|
|
@@ -104,24 +106,37 @@ const beat_graph_data = {
|
|
|
104
106
|
inputs: {
|
|
105
107
|
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
106
108
|
prompt: ":beat.moviePrompt",
|
|
107
|
-
imagePath: ":preprocessor.
|
|
109
|
+
imagePath: ":preprocessor.referenceImage",
|
|
108
110
|
file: ":preprocessor.movieFile",
|
|
109
111
|
studio: ":context.studio", // for cache
|
|
110
112
|
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
111
113
|
index: ":__mapIndex", // for cache
|
|
112
114
|
sessionType: "movie", // for cache
|
|
113
115
|
params: {
|
|
114
|
-
model: ":context.
|
|
116
|
+
model: ":context.presentationStyle.movieParams.model",
|
|
115
117
|
duration: ":beat.duration",
|
|
116
|
-
canvasSize: ":context.
|
|
118
|
+
canvasSize: ":context.presentationStyle.canvasSize",
|
|
117
119
|
},
|
|
118
120
|
},
|
|
119
121
|
defaultValue: {},
|
|
120
122
|
},
|
|
123
|
+
imageFromMovie: {
|
|
124
|
+
if: ":preprocessor.imageFromMovie",
|
|
125
|
+
agent: async (namedInputs) => {
|
|
126
|
+
await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
127
|
+
return { generatedImage: true };
|
|
128
|
+
},
|
|
129
|
+
inputs: {
|
|
130
|
+
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
131
|
+
imageFile: ":preprocessor.imagePath",
|
|
132
|
+
movieFile: ":preprocessor.movieFile",
|
|
133
|
+
},
|
|
134
|
+
defaultValue: { generatedImage: false },
|
|
135
|
+
},
|
|
121
136
|
output: {
|
|
122
137
|
agent: "copyAgent",
|
|
123
138
|
inputs: {
|
|
124
|
-
onComplete: ":
|
|
139
|
+
onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
|
|
125
140
|
imageFile: ":preprocessor.imagePath",
|
|
126
141
|
movieFile: ":preprocessor.movieFile",
|
|
127
142
|
},
|
|
@@ -217,7 +232,6 @@ const googleAuth = async () => {
|
|
|
217
232
|
}
|
|
218
233
|
};
|
|
219
234
|
const graphOption = async (context) => {
|
|
220
|
-
const { studio } = context;
|
|
221
235
|
const agentFilters = [
|
|
222
236
|
{
|
|
223
237
|
name: "fileCacheAgentFilter",
|
|
@@ -225,12 +239,14 @@ const graphOption = async (context) => {
|
|
|
225
239
|
nodeIds: ["imageGenerator", "movieGenerator"],
|
|
226
240
|
},
|
|
227
241
|
];
|
|
242
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
228
243
|
const options = {
|
|
229
244
|
agentFilters,
|
|
245
|
+
taskManager,
|
|
230
246
|
};
|
|
231
|
-
const imageAgentInfo =
|
|
247
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
232
248
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
233
|
-
if (imageAgentInfo.provider === "google" ||
|
|
249
|
+
if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
234
250
|
GraphAILogger.log("google was specified as text2image engine");
|
|
235
251
|
const token = await googleAuth();
|
|
236
252
|
options.config = {
|
|
@@ -250,9 +266,9 @@ const prepareGenerateImages = async (context) => {
|
|
|
250
266
|
const { studio, fileDirs } = context;
|
|
251
267
|
const { outDirPath, imageDirPath } = fileDirs;
|
|
252
268
|
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
253
|
-
const imageAgentInfo =
|
|
269
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
|
|
254
270
|
const imageRefs = {};
|
|
255
|
-
const images =
|
|
271
|
+
const images = context.presentationStyle.imageParams?.images;
|
|
256
272
|
if (images) {
|
|
257
273
|
await Promise.all(Object.keys(images).map(async (key) => {
|
|
258
274
|
const image = images[key];
|
|
@@ -302,14 +318,17 @@ const prepareGenerateImages = async (context) => {
|
|
|
302
318
|
};
|
|
303
319
|
return injections;
|
|
304
320
|
};
|
|
305
|
-
const
|
|
306
|
-
const imageAgentInfo =
|
|
321
|
+
const getConcurrency = (context) => {
|
|
322
|
+
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
307
323
|
if (imageAgentInfo.provider === "openai") {
|
|
308
324
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
309
325
|
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
310
326
|
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
311
|
-
|
|
327
|
+
return imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
|
|
312
328
|
}
|
|
329
|
+
return 4;
|
|
330
|
+
};
|
|
331
|
+
const generateImages = async (context, callbacks) => {
|
|
313
332
|
const options = await graphOption(context);
|
|
314
333
|
const injections = await prepareGenerateImages(context);
|
|
315
334
|
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
|
package/lib/actions/movie.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GraphAILogger, assert } from "graphai";
|
|
2
2
|
import { mulmoTransitionSchema } from "../types/index.js";
|
|
3
|
-
import {
|
|
3
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
4
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
5
5
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
6
6
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
@@ -59,22 +59,22 @@ const getOutputOption = (audioId, videoId) => {
|
|
|
59
59
|
"-b:a 128k", // Audio bitrate
|
|
60
60
|
];
|
|
61
61
|
};
|
|
62
|
-
const createVideo = async (audioArtifactFilePath, outputVideoPath,
|
|
62
|
+
const createVideo = async (audioArtifactFilePath, outputVideoPath, context, caption) => {
|
|
63
63
|
const start = performance.now();
|
|
64
64
|
const ffmpegContext = FfmpegContextInit();
|
|
65
|
-
const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
|
|
65
|
+
const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
|
|
66
66
|
if (missingIndex !== -1) {
|
|
67
67
|
GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
|
|
68
68
|
return false;
|
|
69
69
|
}
|
|
70
|
-
const canvasInfo =
|
|
70
|
+
const canvasInfo = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
71
71
|
// Add each image input
|
|
72
72
|
const filterComplexVideoIds = [];
|
|
73
73
|
const filterComplexAudioIds = [];
|
|
74
74
|
const transitionVideoIds = [];
|
|
75
75
|
const beatTimestamps = [];
|
|
76
|
-
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
77
|
-
const beat = studio.script.beats[index];
|
|
76
|
+
context.studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
77
|
+
const beat = context.studio.script.beats[index];
|
|
78
78
|
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
79
79
|
if (!sourceFile) {
|
|
80
80
|
throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
@@ -83,14 +83,14 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
83
83
|
throw new Error(`studioBeat.duration is not set: index=${index}`);
|
|
84
84
|
}
|
|
85
85
|
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
86
|
-
const mediaType = studioBeat.movieFile ? "movie" :
|
|
86
|
+
const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
87
87
|
const extraPadding = (() => {
|
|
88
88
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
89
89
|
if (index === 0) {
|
|
90
|
-
return
|
|
90
|
+
return context.presentationStyle.audioParams.introPadding;
|
|
91
91
|
}
|
|
92
|
-
else if (index === studio.beats.length - 1) {
|
|
93
|
-
return
|
|
92
|
+
else if (index === context.studio.beats.length - 1) {
|
|
93
|
+
return context.presentationStyle.audioParams.outroPadding;
|
|
94
94
|
}
|
|
95
95
|
return 0;
|
|
96
96
|
})();
|
|
@@ -106,7 +106,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
106
106
|
else {
|
|
107
107
|
filterComplexVideoIds.push(videoId);
|
|
108
108
|
}
|
|
109
|
-
if (
|
|
109
|
+
if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
|
|
110
110
|
const sourceId = filterComplexVideoIds.pop();
|
|
111
111
|
ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
|
|
112
112
|
filterComplexVideoIds.push(`${sourceId}_0`);
|
|
@@ -127,16 +127,16 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
127
127
|
beatTimestamps.push(timestamp);
|
|
128
128
|
return timestamp + duration;
|
|
129
129
|
}, 0);
|
|
130
|
-
assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
131
|
-
assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
130
|
+
assert(filterComplexVideoIds.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
131
|
+
assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
132
132
|
// console.log("*** images", images.audioIds);
|
|
133
133
|
// Concatenate the trimmed images
|
|
134
134
|
const concatVideoId = "concat_video";
|
|
135
|
-
ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
|
|
135
|
+
ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${context.studio.beats.length}:v=1:a=0[${concatVideoId}]`);
|
|
136
136
|
// Add tranditions if needed
|
|
137
137
|
const mixedVideoId = (() => {
|
|
138
|
-
if (
|
|
139
|
-
const transition = mulmoTransitionSchema.parse(
|
|
138
|
+
if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
|
|
139
|
+
const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
|
|
140
140
|
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
141
141
|
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
142
142
|
const processedVideoId = `${transitionVideoId}_f`;
|
|
@@ -166,8 +166,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
166
166
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
167
167
|
const end = performance.now();
|
|
168
168
|
GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
|
|
169
|
-
GraphAILogger.info(studio.script.title);
|
|
170
|
-
GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
169
|
+
GraphAILogger.info(context.studio.script.title);
|
|
170
|
+
GraphAILogger.info((context.studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
171
171
|
return true;
|
|
172
172
|
};
|
|
173
173
|
export const movieFilePath = (context) => {
|
|
@@ -181,7 +181,7 @@ export const movie = async (context) => {
|
|
|
181
181
|
const { outDirPath } = fileDirs;
|
|
182
182
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
183
183
|
const outputVideoPath = movieFilePath(context);
|
|
184
|
-
if (await createVideo(audioArtifactFilePath, outputVideoPath,
|
|
184
|
+
if (await createVideo(audioArtifactFilePath, outputVideoPath, context, caption)) {
|
|
185
185
|
writingMessage(outputVideoPath);
|
|
186
186
|
}
|
|
187
187
|
}
|
package/lib/actions/pdf.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import puppeteer from "puppeteer";
|
|
4
|
-
import {
|
|
4
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
5
5
|
import { localizedText, isHttp } from "../utils/utils.js";
|
|
6
6
|
import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
|
|
7
7
|
import { interpolate } from "../utils/markdown.js";
|
|
@@ -97,7 +97,7 @@ const getHandoutTemplateData = (isLandscapeImage) => ({
|
|
|
97
97
|
const generatePDFHTML = async (context, pdfMode, pdfSize) => {
|
|
98
98
|
const { studio, lang = "en" } = context;
|
|
99
99
|
const { multiLingual } = studio;
|
|
100
|
-
const { width: imageWidth, height: imageHeight } =
|
|
100
|
+
const { width: imageWidth, height: imageHeight } = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
101
101
|
const isLandscapeImage = imageWidth > imageHeight;
|
|
102
102
|
const imagePaths = studio.beats.map((beat) => beat.imageFile);
|
|
103
103
|
const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
|
package/lib/actions/translate.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
3
3
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
4
|
-
const { voiceFile, outputFile,
|
|
4
|
+
const { voiceFile, outputFile, context } = namedInputs;
|
|
5
5
|
const { musicFile } = params;
|
|
6
6
|
const speechDuration = await ffmpegGetMediaDuration(voiceFile);
|
|
7
|
-
const introPadding =
|
|
8
|
-
const outroPadding =
|
|
7
|
+
const introPadding = context.presentationStyle.audioParams.introPadding;
|
|
8
|
+
const outroPadding = context.presentationStyle.audioParams.outroPadding;
|
|
9
9
|
const totalDuration = speechDuration + introPadding + outroPadding;
|
|
10
10
|
GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
|
|
11
11
|
const ffmpegContext = FfmpegContextInit();
|
|
@@ -27,7 +27,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
27
27
|
if (index === context.studio.beats.length - 1) {
|
|
28
28
|
return 0;
|
|
29
29
|
}
|
|
30
|
-
return isClosingGap ? context.
|
|
30
|
+
return isClosingGap ? context.presentationStyle.audioParams.closingPadding : context.presentationStyle.audioParams.padding;
|
|
31
31
|
})();
|
|
32
32
|
const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
|
|
33
33
|
const totalPadding = await (async () => {
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
|
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
3
|
import imageGoogleAgent from "./image_google_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
|
+
import tavilySearchAgent from "./tavily_agent.js";
|
|
5
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
6
7
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
7
8
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
@@ -12,4 +13,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
12
13
|
import { textInputAgent } from "@graphai/input_agents";
|
|
13
14
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
14
15
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
15
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
16
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
|
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
3
|
import imageGoogleAgent from "./image_google_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
|
+
import tavilySearchAgent from "./tavily_agent.js";
|
|
5
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
6
7
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
7
8
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
@@ -13,4 +14,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
13
14
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
14
15
|
// import * as vanilla from "@graphai/vanilla";
|
|
15
16
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
16
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
17
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|