mulmocast 0.0.18 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +13 -11
- package/lib/actions/captions.js +2 -3
- package/lib/actions/images.d.ts +5 -0
- package/lib/actions/images.js +41 -17
- package/lib/actions/movie.js +17 -3
- package/lib/actions/translate.js +3 -3
- package/lib/agents/add_bgm_agent.js +2 -2
- package/lib/agents/combine_audio_files_agent.js +96 -53
- package/lib/agents/image_openai_agent.js +2 -1
- package/lib/agents/validate_schema_agent.d.ts +1 -1
- package/lib/agents/validate_schema_agent.js +3 -3
- package/lib/cli/helpers.js +6 -1
- package/lib/index.browser.d.ts +3 -0
- package/lib/index.browser.js +4 -0
- package/lib/methods/mulmo_presentation_style.js +2 -1
- package/lib/types/schema.d.ts +197 -129
- package/lib/types/schema.js +9 -5
- package/lib/utils/const.d.ts +1 -0
- package/lib/utils/const.js +1 -0
- package/lib/utils/file.d.ts +1 -0
- package/lib/utils/file.js +4 -0
- package/lib/utils/image_plugins/beat.d.ts +1 -0
- package/lib/utils/image_plugins/beat.js +3 -0
- package/lib/utils/image_plugins/chart.d.ts +1 -0
- package/lib/utils/image_plugins/chart.js +2 -0
- package/lib/utils/image_plugins/html_tailwind.d.ts +1 -0
- package/lib/utils/image_plugins/html_tailwind.js +2 -0
- package/lib/utils/image_plugins/image.d.ts +1 -0
- package/lib/utils/image_plugins/image.js +1 -0
- package/lib/utils/image_plugins/index.d.ts +3 -3
- package/lib/utils/image_plugins/index.js +6 -3
- package/lib/utils/image_plugins/markdown.d.ts +1 -0
- package/lib/utils/image_plugins/markdown.js +2 -0
- package/lib/utils/image_plugins/mermaid.d.ts +1 -0
- package/lib/utils/image_plugins/mermaid.js +3 -1
- package/lib/utils/image_plugins/movie.d.ts +1 -0
- package/lib/utils/image_plugins/movie.js +1 -0
- package/lib/utils/image_plugins/source.js +1 -1
- package/lib/utils/image_plugins/text_slide.d.ts +1 -0
- package/lib/utils/image_plugins/text_slide.js +2 -0
- package/lib/utils/image_plugins/utils.d.ts +2 -0
- package/lib/utils/image_plugins/utils.js +3 -0
- package/lib/utils/preprocess.d.ts +3 -1
- package/package.json +15 -2
- package/scripts/templates/image_prompt_only_template.json +33 -0
package/lib/actions/audio.js
CHANGED
|
@@ -178,9 +178,9 @@ const agentFilters = [
|
|
|
178
178
|
},
|
|
179
179
|
];
|
|
180
180
|
export const audioFilePath = (context) => {
|
|
181
|
-
const
|
|
182
|
-
const
|
|
183
|
-
return getAudioArtifactFilePath(outDirPath,
|
|
181
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
182
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
183
|
+
return getAudioArtifactFilePath(outDirPath, fileName);
|
|
184
184
|
};
|
|
185
185
|
const getConcurrency = (context) => {
|
|
186
186
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
@@ -203,9 +203,10 @@ const audioAgents = {
|
|
|
203
203
|
export const generateBeatAudio = async (index, context, callbacks) => {
|
|
204
204
|
try {
|
|
205
205
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
206
|
-
const
|
|
207
|
-
const
|
|
208
|
-
const
|
|
206
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
207
|
+
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
208
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
209
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
|
|
209
210
|
mkdir(outDirPath);
|
|
210
211
|
mkdir(audioSegmentDirPath);
|
|
211
212
|
const taskManager = new TaskManager(getConcurrency(context));
|
|
@@ -229,12 +230,13 @@ export const generateBeatAudio = async (index, context, callbacks) => {
|
|
|
229
230
|
export const audio = async (context, callbacks) => {
|
|
230
231
|
try {
|
|
231
232
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
232
|
-
const
|
|
233
|
-
const
|
|
233
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
234
|
+
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
235
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
234
236
|
const audioArtifactFilePath = audioFilePath(context);
|
|
235
|
-
const audioSegmentDirPath = resolveDirPath(audioDirPath,
|
|
236
|
-
const audioCombinedFilePath = getAudioFilePath(audioDirPath,
|
|
237
|
-
const outputStudioFilePath = getOutputStudioFilePath(outDirPath,
|
|
237
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
|
|
238
|
+
const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
|
|
239
|
+
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
|
|
238
240
|
mkdir(outDirPath);
|
|
239
241
|
mkdir(audioSegmentDirPath);
|
|
240
242
|
const taskManager = new TaskManager(getConcurrency(context));
|
package/lib/actions/captions.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
2
|
import * as agents from "@graphai/vanilla";
|
|
3
|
-
import { getHTMLFile } from "../utils/file.js";
|
|
3
|
+
import { getHTMLFile, getCaptionImagePath } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
5
|
import { MulmoStudioContextMethods, MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
6
6
|
const vanillaAgents = agents.default ?? agents;
|
|
@@ -23,10 +23,9 @@ const graph_data = {
|
|
|
23
23
|
const { beat, context, index } = namedInputs;
|
|
24
24
|
try {
|
|
25
25
|
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
|
|
26
|
-
const imageDirPath = MulmoStudioContextMethods.getImageDirPath(context);
|
|
27
26
|
const caption = MulmoStudioContextMethods.getCaption(context);
|
|
28
27
|
const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
29
|
-
const imagePath =
|
|
28
|
+
const imagePath = getCaptionImagePath(context, index);
|
|
30
29
|
const template = getHTMLFile("caption");
|
|
31
30
|
const text = (() => {
|
|
32
31
|
const multiLingual = context.multiLingual;
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -85,6 +85,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
85
85
|
referenceImage: string;
|
|
86
86
|
prompt: string;
|
|
87
87
|
}>;
|
|
88
|
+
export declare const imagePluginAgent: (namedInputs: {
|
|
89
|
+
context: MulmoStudioContext;
|
|
90
|
+
beat: MulmoBeat;
|
|
91
|
+
index: number;
|
|
92
|
+
}) => Promise<void>;
|
|
88
93
|
export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
|
|
89
94
|
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
|
90
95
|
export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -8,8 +8,9 @@ import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getRefe
|
|
|
8
8
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
9
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
|
-
import {
|
|
11
|
+
import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
12
12
|
import { imagePrompt } from "../utils/prompt.js";
|
|
13
|
+
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
13
14
|
const vanillaAgents = agents.default ?? agents;
|
|
14
15
|
dotenv.config();
|
|
15
16
|
// const openai = new OpenAI();
|
|
@@ -30,19 +31,13 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
30
31
|
movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
|
|
31
32
|
};
|
|
32
33
|
if (beat.image) {
|
|
33
|
-
const plugin =
|
|
34
|
-
if (plugin) {
|
|
35
|
-
|
|
36
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
37
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
38
|
-
const path = await plugin.process(processorParams);
|
|
39
|
-
// undefined prompt indicates that image generation is not needed
|
|
40
|
-
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
41
|
-
}
|
|
42
|
-
finally {
|
|
43
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
44
|
-
}
|
|
34
|
+
const plugin = findImagePlugin(beat?.image?.type);
|
|
35
|
+
if (!plugin) {
|
|
36
|
+
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
45
37
|
}
|
|
38
|
+
const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
|
39
|
+
// undefined prompt indicates that image generation is not needed
|
|
40
|
+
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
46
41
|
}
|
|
47
42
|
// images for "edit_image"
|
|
48
43
|
const images = (() => {
|
|
@@ -56,6 +51,24 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
56
51
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
57
52
|
return { imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
58
53
|
};
|
|
54
|
+
export const imagePluginAgent = async (namedInputs) => {
|
|
55
|
+
const { context, beat, index } = namedInputs;
|
|
56
|
+
const imagePath = getBeatPngImagePath(context, index);
|
|
57
|
+
const plugin = findImagePlugin(beat?.image?.type);
|
|
58
|
+
if (!plugin) {
|
|
59
|
+
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
63
|
+
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
64
|
+
await plugin.process(processorParams);
|
|
65
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
69
|
+
throw error;
|
|
70
|
+
}
|
|
71
|
+
};
|
|
59
72
|
const beat_graph_data = {
|
|
60
73
|
version: 0.5,
|
|
61
74
|
concurrency: 4,
|
|
@@ -76,6 +89,17 @@ const beat_graph_data = {
|
|
|
76
89
|
imageRefs: ":imageRefs",
|
|
77
90
|
},
|
|
78
91
|
},
|
|
92
|
+
imagePlugin: {
|
|
93
|
+
if: ":beat.image",
|
|
94
|
+
defaultValue: {},
|
|
95
|
+
agent: imagePluginAgent,
|
|
96
|
+
inputs: {
|
|
97
|
+
context: ":context",
|
|
98
|
+
beat: ":beat",
|
|
99
|
+
index: ":__mapIndex",
|
|
100
|
+
onComplete: ":preprocessor",
|
|
101
|
+
},
|
|
102
|
+
},
|
|
79
103
|
imageGenerator: {
|
|
80
104
|
if: ":preprocessor.prompt",
|
|
81
105
|
agent: ":imageAgentInfo.agent",
|
|
@@ -101,7 +125,7 @@ const beat_graph_data = {
|
|
|
101
125
|
if: ":preprocessor.movieFile",
|
|
102
126
|
agent: ":movieAgentInfo.agent",
|
|
103
127
|
inputs: {
|
|
104
|
-
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
128
|
+
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
105
129
|
prompt: ":beat.moviePrompt",
|
|
106
130
|
imagePath: ":preprocessor.referenceImage",
|
|
107
131
|
file: ":preprocessor.movieFile",
|
|
@@ -303,7 +327,7 @@ export const getImageRefs = async (context) => {
|
|
|
303
327
|
return imageRefs;
|
|
304
328
|
};
|
|
305
329
|
const prepareGenerateImages = async (context) => {
|
|
306
|
-
const
|
|
330
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
307
331
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
308
332
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
309
333
|
mkdir(imageProjectDirPath);
|
|
@@ -316,7 +340,7 @@ const prepareGenerateImages = async (context) => {
|
|
|
316
340
|
movieAgentInfo: {
|
|
317
341
|
agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
|
|
318
342
|
},
|
|
319
|
-
outputStudioFilePath: getOutputStudioFilePath(outDirPath,
|
|
343
|
+
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
320
344
|
imageRefs,
|
|
321
345
|
};
|
|
322
346
|
return injections;
|
|
@@ -327,7 +351,7 @@ const getConcurrency = (context) => {
|
|
|
327
351
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
328
352
|
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
329
353
|
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
330
|
-
return imageAgentInfo.imageParams.model ===
|
|
354
|
+
return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
|
|
331
355
|
}
|
|
332
356
|
return 4;
|
|
333
357
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -140,10 +140,24 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
|
|
|
140
140
|
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
141
141
|
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
142
142
|
const processedVideoId = `${transitionVideoId}_f`;
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
let transitionFilter;
|
|
144
|
+
if (transition.type === "fade") {
|
|
145
|
+
transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
146
|
+
}
|
|
147
|
+
else if (transition.type === "slideout_left") {
|
|
148
|
+
transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
throw new Error(`Unknown transition type: ${transition.type}`);
|
|
152
|
+
}
|
|
153
|
+
ffmpegContext.filterComplex.push(transitionFilter);
|
|
145
154
|
const outputId = `${transitionVideoId}_o`;
|
|
146
|
-
|
|
155
|
+
if (transition.type === "fade") {
|
|
156
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
157
|
+
}
|
|
158
|
+
else if (transition.type === "slideout_left") {
|
|
159
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
160
|
+
}
|
|
147
161
|
return outputId;
|
|
148
162
|
}, concatVideoId);
|
|
149
163
|
}
|
package/lib/actions/translate.js
CHANGED
|
@@ -211,9 +211,9 @@ const targetLangs = ["ja", "en"];
|
|
|
211
211
|
export const translate = async (context, callbacks) => {
|
|
212
212
|
try {
|
|
213
213
|
MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
|
|
214
|
-
const
|
|
215
|
-
const
|
|
216
|
-
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath,
|
|
214
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
215
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
216
|
+
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
|
|
217
217
|
mkdir(outDirPath);
|
|
218
218
|
assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
219
219
|
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
|
|
@@ -11,8 +11,8 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
11
11
|
const ffmpegContext = FfmpegContextInit();
|
|
12
12
|
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile);
|
|
13
13
|
const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
|
|
14
|
-
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume
|
|
15
|
-
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume
|
|
14
|
+
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
|
|
15
|
+
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
|
|
16
16
|
ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
|
|
17
17
|
ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
|
|
18
18
|
ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
|
|
@@ -18,87 +18,130 @@ const getPadding = (context, beat, index) => {
|
|
|
18
18
|
const isClosingGap = index === context.studio.beats.length - 2;
|
|
19
19
|
return isClosingGap ? context.presentationStyle.audioParams.closingPadding : context.presentationStyle.audioParams.padding;
|
|
20
20
|
};
|
|
21
|
-
const getTotalPadding = (padding, movieDuration, audioDuration, duration
|
|
21
|
+
const getTotalPadding = (padding, movieDuration, audioDuration, duration) => {
|
|
22
22
|
if (movieDuration > 0) {
|
|
23
23
|
return padding + (movieDuration - audioDuration);
|
|
24
24
|
}
|
|
25
25
|
else if (duration && duration > audioDuration) {
|
|
26
26
|
return padding + (duration - audioDuration);
|
|
27
27
|
}
|
|
28
|
-
else if (canSpillover && duration && audioDuration > duration) {
|
|
29
|
-
return duration - audioDuration; // negative value to indicate that there is a spill over.
|
|
30
|
-
}
|
|
31
28
|
return padding;
|
|
32
29
|
};
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
const ffmpegContext = FfmpegContextInit();
|
|
36
|
-
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
|
|
37
|
-
// We cannot reuse longSilentId. We need to explicitly split it for each beat.
|
|
38
|
-
const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
|
|
39
|
-
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
40
|
-
// First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
|
|
41
|
-
const mediaDurations = await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
|
|
30
|
+
const getMediaDurations = (context) => {
|
|
31
|
+
return Promise.all(context.studio.beats.map(async (studioBeat, index) => {
|
|
42
32
|
const beat = context.studio.script.beats[index];
|
|
43
33
|
const movieDuration = await getMovieDulation(beat);
|
|
44
34
|
const audioDuration = studioBeat.audioFile ? await ffmpegGetMediaDuration(studioBeat.audioFile) : 0;
|
|
45
35
|
return {
|
|
46
36
|
movieDuration,
|
|
47
37
|
audioDuration,
|
|
38
|
+
hasMadia: movieDuration + audioDuration > 0,
|
|
39
|
+
silenceDuration: 0,
|
|
48
40
|
};
|
|
49
41
|
}));
|
|
50
|
-
|
|
42
|
+
};
|
|
43
|
+
const getGroupBeatDurations = (context, group, audioDuration) => {
|
|
44
|
+
const specifiedSum = group
|
|
45
|
+
.map((idx) => context.studio.script.beats[idx].duration)
|
|
46
|
+
.filter((d) => d !== undefined)
|
|
47
|
+
.reduce((a, b) => a + b, 0);
|
|
48
|
+
const unspecified = group.filter((idx) => context.studio.script.beats[idx].duration === undefined);
|
|
49
|
+
const minTotal = 1.0 * unspecified.length;
|
|
50
|
+
const rest = Math.max(audioDuration - specifiedSum, minTotal);
|
|
51
|
+
const durationForUnspecified = rest / (unspecified.length || 1);
|
|
52
|
+
const durations = group.map((idx) => {
|
|
53
|
+
const duration = context.studio.script.beats[idx].duration;
|
|
54
|
+
if (duration === undefined) {
|
|
55
|
+
return durationForUnspecified;
|
|
56
|
+
}
|
|
57
|
+
return duration;
|
|
58
|
+
});
|
|
59
|
+
return durations;
|
|
60
|
+
};
|
|
61
|
+
const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
62
|
+
const { context, combinedFileName } = namedInputs;
|
|
63
|
+
const ffmpegContext = FfmpegContextInit();
|
|
64
|
+
// First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
|
|
65
|
+
const mediaDurations = await getMediaDurations(context);
|
|
51
66
|
const beatDurations = [];
|
|
52
|
-
context.studio.beats.
|
|
53
|
-
const beat = context.studio.script.beats[index];
|
|
67
|
+
context.studio.script.beats.forEach((beat, index) => {
|
|
54
68
|
const { audioDuration, movieDuration } = mediaDurations[index];
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
// Check if the current beat has media and the next beat does not have media.
|
|
70
|
+
if (audioDuration > 0) {
|
|
71
|
+
// Check if the current beat has spilled over audio.
|
|
72
|
+
const group = [index];
|
|
73
|
+
for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMadia; i++) {
|
|
74
|
+
group.push(i);
|
|
75
|
+
}
|
|
76
|
+
if (group.length > 1) {
|
|
77
|
+
const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
|
|
78
|
+
// Yes, the current beat has spilled over audio.
|
|
79
|
+
const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
|
|
80
|
+
if (beatsTotalDuration > audioDuration) {
|
|
81
|
+
group.reduce((remaining, idx, iGroup) => {
|
|
82
|
+
if (remaining >= groupBeatsDurations[iGroup]) {
|
|
83
|
+
return remaining - groupBeatsDurations[iGroup];
|
|
84
|
+
}
|
|
85
|
+
mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
|
|
86
|
+
return 0;
|
|
87
|
+
}, audioDuration);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
// Last beat gets the rest of the audio.
|
|
91
|
+
groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
|
|
92
|
+
}
|
|
93
|
+
beatDurations.push(...groupBeatsDurations);
|
|
68
94
|
}
|
|
69
95
|
else {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
96
|
+
// No spilled over audio.
|
|
97
|
+
assert(beatDurations.length === index, "beatDurations.length !== index");
|
|
98
|
+
// padding is the amount of audio padding specified in the script.
|
|
99
|
+
const padding = getPadding(context, beat, index);
|
|
100
|
+
// totalPadding is the amount of audio padding to be added to the audio file.
|
|
101
|
+
const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
|
|
102
|
+
const beatDuration = audioDuration + totalPadding;
|
|
103
|
+
beatDurations.push(beatDuration);
|
|
104
|
+
if (totalPadding > 0) {
|
|
105
|
+
mediaDurations[index].silenceDuration = totalPadding;
|
|
73
106
|
}
|
|
74
107
|
}
|
|
75
108
|
}
|
|
76
|
-
else {
|
|
77
|
-
//
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
109
|
+
else if (movieDuration > 0) {
|
|
110
|
+
// This beat has only a movie, not audio.
|
|
111
|
+
assert(beatDurations.length === index, "beatDurations.length !== index");
|
|
112
|
+
beatDurations.push(movieDuration);
|
|
113
|
+
mediaDurations[index].silenceDuration = movieDuration;
|
|
114
|
+
}
|
|
115
|
+
else if (beatDurations.length === index) {
|
|
116
|
+
// The current beat has no audio, nor no spilled over audio
|
|
117
|
+
const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
|
|
85
118
|
beatDurations.push(beatDuration);
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
119
|
+
mediaDurations[index].silenceDuration = beatDuration;
|
|
120
|
+
}
|
|
121
|
+
// else { Skip this beat if the duration has been already added as a group }
|
|
122
|
+
});
|
|
123
|
+
assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
|
|
124
|
+
// We cannot reuse longSilentId. We need to explicitly split it for each beat.
|
|
125
|
+
const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
|
|
126
|
+
if (silentIds.length > 0) {
|
|
127
|
+
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
|
|
128
|
+
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
129
|
+
}
|
|
130
|
+
const inputIds = [];
|
|
131
|
+
context.studio.beats.forEach((studioBeat, index) => {
|
|
132
|
+
const { silenceDuration } = mediaDurations[index];
|
|
133
|
+
const paddingId = `[padding_${index}]`;
|
|
134
|
+
if (studioBeat.audioFile) {
|
|
135
|
+
const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
|
|
136
|
+
inputIds.push(audioId);
|
|
137
|
+
}
|
|
138
|
+
if (silenceDuration > 0) {
|
|
89
139
|
const silentId = silentIds.pop();
|
|
90
|
-
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${
|
|
140
|
+
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
|
|
91
141
|
inputIds.push(paddingId);
|
|
92
142
|
}
|
|
93
|
-
return 0;
|
|
94
|
-
}, 0);
|
|
95
|
-
assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
|
|
96
|
-
// We need to "consume" extra silentIds.
|
|
97
|
-
silentIds.forEach((silentId, index) => {
|
|
98
|
-
const extraId = `[silent_extra_${index}]`;
|
|
99
|
-
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}${extraId}`);
|
|
100
|
-
inputIds.push(extraId);
|
|
101
143
|
});
|
|
144
|
+
assert(silentIds.length === 0, "silentIds.length !== 0");
|
|
102
145
|
// Finally, combine all audio files.
|
|
103
146
|
ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
|
|
104
147
|
await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import OpenAI, { toFile } from "openai";
|
|
4
|
+
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
4
5
|
// https://platform.openai.com/docs/guides/image-generation
|
|
5
6
|
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
6
7
|
const { prompt, images } = namedInputs;
|
|
7
8
|
const { apiKey, moderation, canvasSize } = params;
|
|
8
|
-
const model = params.model ??
|
|
9
|
+
const model = params.model ?? defaultOpenAIImageModel;
|
|
9
10
|
const openai = new OpenAI({ apiKey });
|
|
10
11
|
const size = (() => {
|
|
11
12
|
if (model === "gpt-image-1") {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type AgentFunction, type AgentFunctionInfo, type DefaultConfigData } from "graphai";
|
|
2
2
|
import { MulmoScript } from "../types/index.js";
|
|
3
3
|
import { ZodSchema } from "zod";
|
|
4
4
|
interface ValidateMulmoScriptInputs {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import assert from "
|
|
1
|
+
import { assert } from "graphai";
|
|
2
2
|
/**
|
|
3
3
|
* Zod schema validation agent
|
|
4
4
|
* Validates if a JSON string conforms to the Zod schema
|
|
@@ -6,8 +6,8 @@ import assert from "node:assert";
|
|
|
6
6
|
export const validateSchemaAgent = async ({ namedInputs, }) => {
|
|
7
7
|
const { text, schema } = namedInputs;
|
|
8
8
|
try {
|
|
9
|
-
assert(schema, "schema is required");
|
|
10
|
-
assert(text, "text is required");
|
|
9
|
+
assert(!!schema, "schema is required");
|
|
10
|
+
assert(!!text, "text is required");
|
|
11
11
|
const jsonData = JSON.parse(text);
|
|
12
12
|
const parsed = schema.parse(jsonData);
|
|
13
13
|
return {
|
package/lib/cli/helpers.js
CHANGED
|
@@ -83,7 +83,12 @@ export const fetchScript = async (isHttpPath, mulmoFilePath, fileOrUrl) => {
|
|
|
83
83
|
export const getMultiLingual = (multilingualFilePath, beatsLength) => {
|
|
84
84
|
if (fs.existsSync(multilingualFilePath)) {
|
|
85
85
|
const jsonData = readMulmoScriptFile(multilingualFilePath, "ERROR: File does not exist " + multilingualFilePath)?.mulmoData ?? null;
|
|
86
|
-
|
|
86
|
+
const dataSet = mulmoStudioMultiLingualSchema.parse(jsonData);
|
|
87
|
+
while (dataSet.length < beatsLength) {
|
|
88
|
+
dataSet.push({ multiLingualTexts: {} });
|
|
89
|
+
}
|
|
90
|
+
dataSet.length = beatsLength;
|
|
91
|
+
return dataSet;
|
|
87
92
|
}
|
|
88
93
|
return [...Array(beatsLength)].map(() => ({ multiLingualTexts: {} }));
|
|
89
94
|
};
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { text2ImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
|
|
3
|
+
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
3
4
|
const defaultTextSlideStyles = [
|
|
4
5
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
5
6
|
"body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
@@ -56,7 +57,7 @@ export const MulmoPresentationStyleMethods = {
|
|
|
56
57
|
// provider and model appropriately.
|
|
57
58
|
const provider = text2ImageProviderSchema.parse(presentationStyle.imageParams?.provider);
|
|
58
59
|
const defaultImageParams = {
|
|
59
|
-
model: provider === "openai" ? process.env.DEFAULT_OPENAI_IMAGE_MODEL : undefined,
|
|
60
|
+
model: provider === "openai" ? (process.env.DEFAULT_OPENAI_IMAGE_MODEL ?? defaultOpenAIImageModel) : undefined,
|
|
60
61
|
};
|
|
61
62
|
return {
|
|
62
63
|
provider,
|