mulmocast 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/assets/templates/ghibli_shorts.json +1 -1
- package/lib/actions/images.d.ts +4 -0
- package/lib/actions/images.js +27 -26
- package/lib/actions/movie.js +80 -91
- package/lib/agents/add_bgm_agent.js +15 -2
- package/lib/agents/combine_audio_files_agent.js +3 -3
- package/lib/types/schema.d.ts +99 -74
- package/lib/types/schema.js +22 -7
- package/lib/types/type.d.ts +4 -2
- package/lib/utils/context.d.ts +3 -2
- package/lib/utils/context.js +1 -0
- package/lib/utils/file.js +8 -0
- package/lib/utils/filters.js +8 -3
- package/lib/utils/preprocess.d.ts +1 -1
- package/lib/utils/utils.d.ts +1 -0
- package/lib/utils/utils.js +14 -0
- package/package.json +12 -12
- package/scripts/templates/voice_over.json +60 -0
package/README.md
CHANGED
|
@@ -82,6 +82,16 @@ brew install ffmpeg
|
|
|
82
82
|
# Visit https://ffmpeg.org/download.html
|
|
83
83
|
```
|
|
84
84
|
|
|
85
|
+
You can also use [`Dockerfile`](./Dockerfile) which helps you install the pre-requisits.
|
|
86
|
+
```
|
|
87
|
+
docker build -t mulmo-cli .
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
You can use the Docker image like this:
|
|
91
|
+
```
|
|
92
|
+
docker run -e OPENAI_API_KEY=<your_openai_api_key> -it mulmo-cli mulmo tool scripting -i -t children_book -o ./ -s story
|
|
93
|
+
```
|
|
94
|
+
|
|
85
95
|
## Configuration
|
|
86
96
|
|
|
87
97
|
Create a `.env` file in your project directory with the following API keys:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Ghibli
|
|
2
|
+
"title": "Ghibli style for YouTube Shorts",
|
|
3
3
|
"description": "Template for Ghibli-style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -32,10 +32,12 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
32
32
|
imagePath: string | undefined;
|
|
33
33
|
referenceImage: string | undefined;
|
|
34
34
|
htmlPrompt?: undefined;
|
|
35
|
+
htmlPath?: undefined;
|
|
35
36
|
htmlImageSystemPrompt?: undefined;
|
|
36
37
|
} | {
|
|
37
38
|
imagePath: string;
|
|
38
39
|
htmlPrompt: string;
|
|
40
|
+
htmlPath: string;
|
|
39
41
|
htmlImageSystemPrompt: string[];
|
|
40
42
|
} | {
|
|
41
43
|
imagePath: string;
|
|
@@ -65,6 +67,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
65
67
|
};
|
|
66
68
|
movieFile: string | undefined;
|
|
67
69
|
htmlPrompt?: undefined;
|
|
70
|
+
htmlPath?: undefined;
|
|
68
71
|
htmlImageSystemPrompt?: undefined;
|
|
69
72
|
} | {
|
|
70
73
|
images: string[];
|
|
@@ -96,6 +99,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
96
99
|
referenceImage: string;
|
|
97
100
|
prompt: string;
|
|
98
101
|
htmlPrompt?: undefined;
|
|
102
|
+
htmlPath?: undefined;
|
|
99
103
|
htmlImageSystemPrompt?: undefined;
|
|
100
104
|
}>;
|
|
101
105
|
export declare const imagePluginAgent: (namedInputs: {
|
package/lib/actions/images.js
CHANGED
|
@@ -10,7 +10,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
10
10
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
11
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
12
12
|
import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
13
|
-
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
13
|
+
import { userAssert, settings2GraphAIConfig, getExtention } from "../utils/utils.js";
|
|
14
14
|
import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
|
|
15
15
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
16
|
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
@@ -44,7 +44,8 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
44
44
|
}
|
|
45
45
|
if (beat.htmlPrompt) {
|
|
46
46
|
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
47
|
-
|
|
47
|
+
const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
|
|
48
|
+
return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
48
49
|
}
|
|
49
50
|
// images for "edit_image"
|
|
50
51
|
const images = (() => {
|
|
@@ -77,11 +78,8 @@ export const imagePluginAgent = async (namedInputs) => {
|
|
|
77
78
|
}
|
|
78
79
|
};
|
|
79
80
|
const htmlImageGeneratorAgent = async (namedInputs) => {
|
|
80
|
-
const {
|
|
81
|
-
|
|
82
|
-
const htmlFile = file.replace(/\.[^/.]+$/, ".html");
|
|
83
|
-
await fs.promises.writeFile(htmlFile, html, "utf8");
|
|
84
|
-
await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
|
|
81
|
+
const { file, canvasSize, htmlText } = namedInputs;
|
|
82
|
+
await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
|
|
85
83
|
};
|
|
86
84
|
const beat_graph_data = {
|
|
87
85
|
version: 0.5,
|
|
@@ -124,14 +122,33 @@ const beat_graph_data = {
|
|
|
124
122
|
model: ":htmlImageAgentInfo.model",
|
|
125
123
|
max_tokens: ":htmlImageAgentInfo.max_tokens",
|
|
126
124
|
},
|
|
125
|
+
file: ":preprocessor.htmlPath", // only for fileCacheAgentFilter
|
|
126
|
+
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
127
|
+
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
128
|
+
sessionType: "html", // for fileCacheAgentFilter
|
|
127
129
|
},
|
|
128
130
|
},
|
|
131
|
+
htmlReader: {
|
|
132
|
+
if: ":preprocessor.htmlPrompt",
|
|
133
|
+
agent: async (namedInputs) => {
|
|
134
|
+
const html = await fs.promises.readFile(namedInputs.htmlPath, "utf8");
|
|
135
|
+
return { html };
|
|
136
|
+
},
|
|
137
|
+
inputs: {
|
|
138
|
+
onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
|
|
139
|
+
htmlPath: ":preprocessor.htmlPath",
|
|
140
|
+
},
|
|
141
|
+
output: {
|
|
142
|
+
htmlText: ".html.codeBlockOrRaw()",
|
|
143
|
+
},
|
|
144
|
+
defaultValue: {},
|
|
145
|
+
},
|
|
129
146
|
htmlImageGenerator: {
|
|
130
147
|
if: ":preprocessor.htmlPrompt",
|
|
131
148
|
defaultValue: {},
|
|
132
149
|
agent: htmlImageGeneratorAgent,
|
|
133
150
|
inputs: {
|
|
134
|
-
|
|
151
|
+
htmlText: ":htmlReader.htmlText",
|
|
135
152
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
136
153
|
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
137
154
|
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
@@ -295,7 +312,7 @@ const graphOption = async (context, settings) => {
|
|
|
295
312
|
{
|
|
296
313
|
name: "fileCacheAgentFilter",
|
|
297
314
|
agent: fileCacheAgentFilter,
|
|
298
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator"],
|
|
315
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
|
|
299
316
|
},
|
|
300
317
|
];
|
|
301
318
|
const taskManager = new TaskManager(getConcurrency(context));
|
|
@@ -339,23 +356,7 @@ export const getImageRefs = async (context) => {
|
|
|
339
356
|
}
|
|
340
357
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
341
358
|
// Detect file extension from Content-Type header or URL
|
|
342
|
-
const extension = (()
|
|
343
|
-
const contentType = response.headers.get("content-type");
|
|
344
|
-
if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
|
|
345
|
-
return "jpg";
|
|
346
|
-
}
|
|
347
|
-
else if (contentType?.includes("png")) {
|
|
348
|
-
return "png";
|
|
349
|
-
}
|
|
350
|
-
else {
|
|
351
|
-
// Fall back to URL extension
|
|
352
|
-
const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
|
|
353
|
-
if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
|
|
354
|
-
return urlExtension === "jpeg" ? "jpg" : urlExtension;
|
|
355
|
-
}
|
|
356
|
-
return "png"; // default
|
|
357
|
-
}
|
|
358
|
-
})();
|
|
359
|
+
const extension = getExtention(response.headers.get("content-type"), image.source.url);
|
|
359
360
|
const imagePath = getReferenceImagePath(context, key, extension);
|
|
360
361
|
await fs.promises.writeFile(imagePath, buffer);
|
|
361
362
|
imageRefs[key] = imagePath;
|
package/lib/actions/movie.js
CHANGED
|
@@ -2,7 +2,7 @@ import { GraphAILogger, assert } from "graphai";
|
|
|
2
2
|
import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
|
|
3
3
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
4
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
5
|
-
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
5
|
+
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput, } from "../utils/ffmpeg_utils.js";
|
|
6
6
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
7
7
|
// const isMac = process.platform === "darwin";
|
|
8
8
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
@@ -77,6 +77,63 @@ const getOutputOption = (audioId, videoId) => {
|
|
|
77
77
|
"-b:a 128k", // Audio bitrate
|
|
78
78
|
];
|
|
79
79
|
};
|
|
80
|
+
const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
|
|
81
|
+
const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
|
|
82
|
+
if (caption && beatsWithCaptions.length > 0) {
|
|
83
|
+
const introPadding = context.presentationStyle.audioParams.introPadding;
|
|
84
|
+
return beatsWithCaptions.reduce((acc, beat, index) => {
|
|
85
|
+
const { startAt, duration, captionFile } = beat;
|
|
86
|
+
if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
|
|
87
|
+
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
|
|
88
|
+
const compositeVideoId = `oc${index}`;
|
|
89
|
+
ffmpegContext.filterComplex.push(`[${acc}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
|
|
90
|
+
return compositeVideoId;
|
|
91
|
+
}
|
|
92
|
+
return acc;
|
|
93
|
+
}, concatVideoId);
|
|
94
|
+
}
|
|
95
|
+
return concatVideoId;
|
|
96
|
+
};
|
|
97
|
+
const addTransitionEffects = (ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps) => {
|
|
98
|
+
if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
|
|
99
|
+
const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
|
|
100
|
+
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
101
|
+
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
102
|
+
const processedVideoId = `${transitionVideoId}_f`;
|
|
103
|
+
let transitionFilter;
|
|
104
|
+
if (transition.type === "fade") {
|
|
105
|
+
transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
106
|
+
}
|
|
107
|
+
else if (transition.type === "slideout_left") {
|
|
108
|
+
transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
throw new Error(`Unknown transition type: ${transition.type}`);
|
|
112
|
+
}
|
|
113
|
+
ffmpegContext.filterComplex.push(transitionFilter);
|
|
114
|
+
const outputId = `${transitionVideoId}_o`;
|
|
115
|
+
if (transition.type === "fade") {
|
|
116
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
117
|
+
}
|
|
118
|
+
else if (transition.type === "slideout_left") {
|
|
119
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
120
|
+
}
|
|
121
|
+
return outputId;
|
|
122
|
+
}, captionedVideoId);
|
|
123
|
+
}
|
|
124
|
+
return captionedVideoId;
|
|
125
|
+
};
|
|
126
|
+
const mixAudiosFromMovieBeats = (ffmpegContext, artifactAudioId, audioIdsFromMovieBeats) => {
|
|
127
|
+
if (audioIdsFromMovieBeats.length > 0) {
|
|
128
|
+
const mainAudioId = "mainaudio";
|
|
129
|
+
const compositeAudioId = "composite";
|
|
130
|
+
const audioIds = audioIdsFromMovieBeats.map((id) => `[${id}]`).join("");
|
|
131
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
132
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${audioIdsFromMovieBeats.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
133
|
+
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
134
|
+
}
|
|
135
|
+
return artifactAudioId;
|
|
136
|
+
};
|
|
80
137
|
const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
81
138
|
const caption = MulmoStudioContextMethods.getCaption(context);
|
|
82
139
|
const start = performance.now();
|
|
@@ -94,26 +151,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
94
151
|
}
|
|
95
152
|
const canvasInfo = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
96
153
|
// Add each image input
|
|
97
|
-
const
|
|
98
|
-
const
|
|
154
|
+
const videoIdsForBeats = [];
|
|
155
|
+
const audioIdsFromMovieBeats = [];
|
|
99
156
|
const transitionVideoIds = [];
|
|
100
157
|
const beatTimestamps = [];
|
|
101
158
|
context.studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
102
159
|
const beat = context.studio.script.beats[index];
|
|
103
160
|
if (beat.image?.type === "voice_over") {
|
|
104
|
-
|
|
161
|
+
videoIdsForBeats.push(undefined);
|
|
105
162
|
beatTimestamps.push(timestamp);
|
|
106
163
|
return timestamp; // Skip voice-over beats.
|
|
107
164
|
}
|
|
108
165
|
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
}
|
|
112
|
-
if (!studioBeat.duration) {
|
|
113
|
-
throw new Error(`studioBeat.duration is not set: index=${index}`);
|
|
114
|
-
}
|
|
115
|
-
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
116
|
-
const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
166
|
+
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
167
|
+
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
117
168
|
const extraPadding = (() => {
|
|
118
169
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
119
170
|
if (index === 0) {
|
|
@@ -131,111 +182,49 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
131
182
|
const beatFillOption = beat.movieParams?.fillOption;
|
|
132
183
|
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
133
184
|
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
185
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
186
|
+
const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
134
187
|
const speed = beat.movieParams?.speed ?? 1.0;
|
|
135
188
|
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
|
|
136
189
|
ffmpegContext.filterComplex.push(videoPart);
|
|
137
|
-
/*
|
|
138
|
-
if (caption && studioBeat.captionFile) {
|
|
139
|
-
// NOTE: This works for normal beats, but not for voice-over beats.
|
|
140
|
-
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
|
|
141
|
-
const compositeVideoId = `c${index}`;
|
|
142
|
-
ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
|
|
143
|
-
filterComplexVideoIds.push(compositeVideoId);
|
|
144
|
-
} else {
|
|
145
|
-
}
|
|
146
|
-
*/
|
|
147
|
-
filterComplexVideoIds.push(videoId);
|
|
148
190
|
if (context.presentationStyle.movieParams?.transition && index < context.studio.beats.length - 1) {
|
|
149
|
-
|
|
150
|
-
ffmpegContext.filterComplex.push(`[${
|
|
151
|
-
|
|
191
|
+
// NOTE: We split the video into two parts for transition.
|
|
192
|
+
ffmpegContext.filterComplex.push(`[${videoId}]split=2[${videoId}_0][${videoId}_1]`);
|
|
193
|
+
videoIdsForBeats.push(`${videoId}_0`);
|
|
152
194
|
if (mediaType === "movie") {
|
|
153
195
|
// For movie beats, extract the last frame for transition
|
|
154
|
-
ffmpegContext.filterComplex.push(`[${
|
|
155
|
-
transitionVideoIds.push(`${
|
|
196
|
+
ffmpegContext.filterComplex.push(`[${videoId}_1]reverse,select='eq(n,0)',reverse,tpad=stop_mode=clone:stop_duration=${duration},fps=30,setpts=PTS-STARTPTS[${videoId}_2]`);
|
|
197
|
+
transitionVideoIds.push(`${videoId}_2`);
|
|
156
198
|
}
|
|
157
199
|
else {
|
|
158
|
-
transitionVideoIds.push(`${
|
|
200
|
+
transitionVideoIds.push(`${videoId}_1`);
|
|
159
201
|
}
|
|
160
202
|
}
|
|
203
|
+
else {
|
|
204
|
+
videoIdsForBeats.push(videoId);
|
|
205
|
+
}
|
|
161
206
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
162
207
|
if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0 && speed === 1.0) {
|
|
163
208
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
|
|
164
|
-
|
|
209
|
+
audioIdsFromMovieBeats.push(audioId);
|
|
165
210
|
ffmpegContext.filterComplex.push(audioPart);
|
|
166
211
|
}
|
|
167
212
|
beatTimestamps.push(timestamp);
|
|
168
213
|
return timestamp + duration;
|
|
169
214
|
}, 0);
|
|
170
|
-
assert(
|
|
215
|
+
assert(videoIdsForBeats.length === context.studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
171
216
|
assert(beatTimestamps.length === context.studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
172
217
|
// console.log("*** images", images.audioIds);
|
|
173
218
|
// Concatenate the trimmed images
|
|
174
219
|
const concatVideoId = "concat_video";
|
|
175
|
-
const videoIds =
|
|
220
|
+
const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
|
|
176
221
|
ffmpegContext.filterComplex.push(`${videoIds.map((id) => `[${id}]`).join("")}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`);
|
|
177
|
-
|
|
178
|
-
const
|
|
179
|
-
const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
|
|
180
|
-
if (caption && beatsWithCaptions.length > 0) {
|
|
181
|
-
const introPadding = context.presentationStyle.audioParams.introPadding;
|
|
182
|
-
return beatsWithCaptions.reduce((acc, beat, index) => {
|
|
183
|
-
const { startAt, duration, captionFile } = beat;
|
|
184
|
-
if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
|
|
185
|
-
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
|
|
186
|
-
const compositeVideoId = `oc${index}`;
|
|
187
|
-
ffmpegContext.filterComplex.push(`[${acc}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
|
|
188
|
-
return compositeVideoId;
|
|
189
|
-
}
|
|
190
|
-
return acc;
|
|
191
|
-
}, concatVideoId);
|
|
192
|
-
}
|
|
193
|
-
return concatVideoId;
|
|
194
|
-
})();
|
|
195
|
-
// Add tranditions if needed
|
|
196
|
-
const mixedVideoId = (() => {
|
|
197
|
-
if (context.presentationStyle.movieParams?.transition && transitionVideoIds.length > 0) {
|
|
198
|
-
const transition = mulmoTransitionSchema.parse(context.presentationStyle.movieParams.transition);
|
|
199
|
-
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
200
|
-
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
201
|
-
const processedVideoId = `${transitionVideoId}_f`;
|
|
202
|
-
let transitionFilter;
|
|
203
|
-
if (transition.type === "fade") {
|
|
204
|
-
transitionFilter = `[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
205
|
-
}
|
|
206
|
-
else if (transition.type === "slideout_left") {
|
|
207
|
-
transitionFilter = `[${transitionVideoId}]format=yuva420p,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`;
|
|
208
|
-
}
|
|
209
|
-
else {
|
|
210
|
-
throw new Error(`Unknown transition type: ${transition.type}`);
|
|
211
|
-
}
|
|
212
|
-
ffmpegContext.filterComplex.push(transitionFilter);
|
|
213
|
-
const outputId = `${transitionVideoId}_o`;
|
|
214
|
-
if (transition.type === "fade") {
|
|
215
|
-
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
216
|
-
}
|
|
217
|
-
else if (transition.type === "slideout_left") {
|
|
218
|
-
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=x='-(t-${transitionStartTime})*W/${transition.duration}':y=0:enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
219
|
-
}
|
|
220
|
-
return outputId;
|
|
221
|
-
}, captionedVideoId);
|
|
222
|
-
}
|
|
223
|
-
return captionedVideoId;
|
|
224
|
-
})();
|
|
222
|
+
const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
|
|
223
|
+
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
|
|
225
224
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
226
225
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
227
226
|
const artifactAudioId = `${audioIndex}:a`;
|
|
228
|
-
const ffmpegContextAudioId = (
|
|
229
|
-
if (filterComplexAudioIds.length > 0) {
|
|
230
|
-
const mainAudioId = "mainaudio";
|
|
231
|
-
const compositeAudioId = "composite";
|
|
232
|
-
const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
|
|
233
|
-
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
234
|
-
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
235
|
-
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
236
|
-
}
|
|
237
|
-
return artifactAudioId;
|
|
238
|
-
})();
|
|
227
|
+
const ffmpegContextAudioId = mixAudiosFromMovieBeats(ffmpegContext, artifactAudioId, audioIdsFromMovieBeats);
|
|
239
228
|
// GraphAILogger.debug("filterComplex", ffmpegContext.filterComplex);
|
|
240
229
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
241
230
|
const end = performance.now();
|
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
import fs from "fs";
|
|
1
2
|
import { GraphAILogger } from "graphai";
|
|
2
3
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
3
4
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
4
5
|
const { voiceFile, outputFile, context } = namedInputs;
|
|
5
6
|
const { musicFile } = params;
|
|
7
|
+
if (!fs.existsSync(voiceFile)) {
|
|
8
|
+
throw new Error(`AddBGMAgent voiceFile not exist: ${voiceFile}`);
|
|
9
|
+
}
|
|
10
|
+
if (!musicFile.match(/^http/) && !fs.existsSync(musicFile)) {
|
|
11
|
+
throw new Error(`AddBGMAgent musicFile not exist: ${musicFile}`);
|
|
12
|
+
}
|
|
6
13
|
const speechDuration = await ffmpegGetMediaDuration(voiceFile);
|
|
7
14
|
const introPadding = context.presentationStyle.audioParams.introPadding;
|
|
8
15
|
const outroPadding = context.presentationStyle.audioParams.outroPadding;
|
|
@@ -16,8 +23,14 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
16
23
|
ffmpegContext.filterComplex.push(`[music][voice]amix=inputs=2:duration=longest[mixed]`);
|
|
17
24
|
ffmpegContext.filterComplex.push(`[mixed]atrim=start=0:end=${totalDuration}[trimmed]`);
|
|
18
25
|
ffmpegContext.filterComplex.push(`[trimmed]afade=t=out:st=${totalDuration - outroPadding}:d=${outroPadding}[faded]`);
|
|
19
|
-
|
|
20
|
-
|
|
26
|
+
try {
|
|
27
|
+
await FfmpegContextGenerateOutput(ffmpegContext, outputFile, ["-map", "[faded]"]);
|
|
28
|
+
return outputFile;
|
|
29
|
+
}
|
|
30
|
+
catch (e) {
|
|
31
|
+
GraphAILogger.log(e);
|
|
32
|
+
throw new Error(`AddBGMAgent ffmpeg run Error`);
|
|
33
|
+
}
|
|
21
34
|
};
|
|
22
35
|
const addBGMAgentInfo = {
|
|
23
36
|
name: "addBGMAgent",
|
|
@@ -82,7 +82,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
82
82
|
if (group.length > 1) {
|
|
83
83
|
group.reduce((remaining, idx, iGroup) => {
|
|
84
84
|
const subBeatDurations = mediaDurations[idx];
|
|
85
|
-
userAssert(subBeatDurations.audioDuration <= remaining, `
|
|
85
|
+
userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
|
|
86
86
|
if (iGroup === group.length - 1) {
|
|
87
87
|
beatDurations.push(remaining);
|
|
88
88
|
subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
|
|
@@ -94,10 +94,10 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
94
94
|
if (voiceStartAt) {
|
|
95
95
|
const remainingDuration = movieDuration - voiceStartAt;
|
|
96
96
|
const duration = remaining - remainingDuration;
|
|
97
|
-
userAssert(duration >= 0, `duration(${duration}) < 0`);
|
|
97
|
+
userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
|
|
98
98
|
beatDurations.push(duration);
|
|
99
99
|
subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
|
|
100
|
-
userAssert(subBeatDurations.silenceDuration >= 0, `
|
|
100
|
+
userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
|
|
101
101
|
return remainingDuration;
|
|
102
102
|
}
|
|
103
103
|
beatDurations.push(subBeatDurations.audioDuration);
|