mulmocast 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/lib/actions/captions.js +1 -1
- package/lib/actions/images.d.ts +13 -3
- package/lib/actions/images.js +91 -9
- package/lib/actions/movie.d.ts +2 -2
- package/lib/actions/movie.js +21 -6
- package/lib/agents/add_bgm_agent.js +1 -1
- package/lib/agents/combine_audio_files_agent.js +9 -5
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_replicate_agent.d.ts +23 -0
- package/lib/agents/movie_replicate_agent.js +93 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
- package/lib/cli/commands/tool/scripting/builder.js +5 -0
- package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
- package/lib/cli/commands/tool/scripting/handler.js +13 -4
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/cli/helpers.js +8 -3
- package/lib/methods/mulmo_presentation_style.d.ts +2 -1
- package/lib/methods/mulmo_presentation_style.js +21 -2
- package/lib/methods/mulmo_studio_context.js +1 -1
- package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
- package/lib/tools/create_mulmo_script_from_url.js +129 -43
- package/lib/types/schema.d.ts +1261 -165
- package/lib/types/schema.js +47 -1
- package/lib/types/type.d.ts +9 -2
- package/lib/utils/ffmpeg_utils.d.ts +2 -2
- package/lib/utils/ffmpeg_utils.js +9 -4
- package/lib/utils/preprocess.d.ts +47 -6
- package/lib/utils/utils.d.ts +1 -0
- package/lib/utils/utils.js +5 -0
- package/package.json +3 -2
- package/scripts/templates/presentation.json +123 -0
- package/scripts/templates/presentation.json~ +119 -0
package/README.md
CHANGED
|
@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
|
|
|
103
103
|
|
|
104
104
|
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
105
105
|
|
|
106
|
+
#### (Optional) For Movie models
|
|
107
|
+
```bash
|
|
108
|
+
REPLICATE_API_TOKEN=your_replicate_api_key
|
|
109
|
+
```
|
|
110
|
+
|
|
106
111
|
#### (Optional) For TTS models
|
|
107
112
|
```bash
|
|
108
113
|
# For Nijivoice TTS
|
package/lib/actions/captions.js
CHANGED
|
@@ -61,7 +61,7 @@ const graph_data = {
|
|
|
61
61
|
},
|
|
62
62
|
};
|
|
63
63
|
export const captions = async (context, callbacks) => {
|
|
64
|
-
if (context
|
|
64
|
+
if (MulmoStudioContextMethods.getCaption(context)) {
|
|
65
65
|
try {
|
|
66
66
|
MulmoStudioContextMethods.setSessionState(context, "caption", true);
|
|
67
67
|
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
8
8
|
imageRefs: Record<string, string>;
|
|
9
9
|
}) => Promise<{
|
|
10
10
|
imageParams: {
|
|
11
|
-
model?: string | undefined;
|
|
12
11
|
style?: string | undefined;
|
|
12
|
+
model?: string | undefined;
|
|
13
13
|
moderation?: string | undefined;
|
|
14
14
|
images?: Record<string, {
|
|
15
15
|
type: "image";
|
|
@@ -31,13 +31,19 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
31
31
|
movieFile: string | undefined;
|
|
32
32
|
imagePath: string | undefined;
|
|
33
33
|
referenceImage: string | undefined;
|
|
34
|
+
htmlPrompt?: undefined;
|
|
35
|
+
htmlSystemPrompt?: undefined;
|
|
36
|
+
} | {
|
|
37
|
+
imagePath: string;
|
|
38
|
+
htmlPrompt: string;
|
|
39
|
+
htmlSystemPrompt: string[];
|
|
34
40
|
} | {
|
|
35
41
|
imagePath: string;
|
|
36
42
|
images: string[];
|
|
37
43
|
imageFromMovie: boolean;
|
|
38
44
|
imageParams: {
|
|
39
|
-
model?: string | undefined;
|
|
40
45
|
style?: string | undefined;
|
|
46
|
+
model?: string | undefined;
|
|
41
47
|
moderation?: string | undefined;
|
|
42
48
|
images?: Record<string, {
|
|
43
49
|
type: "image";
|
|
@@ -57,11 +63,13 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
57
63
|
}> | undefined;
|
|
58
64
|
};
|
|
59
65
|
movieFile: string | undefined;
|
|
66
|
+
htmlPrompt?: undefined;
|
|
67
|
+
htmlSystemPrompt?: undefined;
|
|
60
68
|
} | {
|
|
61
69
|
images: string[];
|
|
62
70
|
imageParams: {
|
|
63
|
-
model?: string | undefined;
|
|
64
71
|
style?: string | undefined;
|
|
72
|
+
model?: string | undefined;
|
|
65
73
|
moderation?: string | undefined;
|
|
66
74
|
images?: Record<string, {
|
|
67
75
|
type: "image";
|
|
@@ -84,6 +92,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
84
92
|
imagePath: string;
|
|
85
93
|
referenceImage: string;
|
|
86
94
|
prompt: string;
|
|
95
|
+
htmlPrompt?: undefined;
|
|
96
|
+
htmlSystemPrompt?: undefined;
|
|
87
97
|
}>;
|
|
88
98
|
export declare const imagePluginAgent: (namedInputs: {
|
|
89
99
|
context: MulmoStudioContext;
|
package/lib/actions/images.js
CHANGED
|
@@ -3,17 +3,19 @@ import fs from "fs";
|
|
|
3
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
4
|
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
5
5
|
import * as agents from "@graphai/vanilla";
|
|
6
|
+
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
|
+
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
6
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
7
9
|
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
8
10
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
9
|
-
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
|
+
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
10
12
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
13
|
import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
12
14
|
import { imagePrompt } from "../utils/prompt.js";
|
|
13
15
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
|
+
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
14
17
|
const vanillaAgents = agents.default ?? agents;
|
|
15
18
|
dotenv.config();
|
|
16
|
-
// const openai = new OpenAI();
|
|
17
19
|
import { GoogleAuth } from "google-auth-library";
|
|
18
20
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
19
21
|
const htmlStyle = (context, beat) => {
|
|
@@ -39,6 +41,18 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
39
41
|
// undefined prompt indicates that image generation is not needed
|
|
40
42
|
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
41
43
|
}
|
|
44
|
+
if (beat.htmlPrompt) {
|
|
45
|
+
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
46
|
+
const htmlSystemPrompt = [
|
|
47
|
+
"Based on the provided information, create a single slide HTML page using Tailwind CSS.",
|
|
48
|
+
`The view port size is ${context.presentationStyle.canvasSize.width}x${context.presentationStyle.canvasSize.height}. Make sure the HTML fits within the view port.`,
|
|
49
|
+
"If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
|
|
50
|
+
"Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
|
|
51
|
+
"Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
|
|
52
|
+
"If data is provided, use it effectively to populate the slide.",
|
|
53
|
+
];
|
|
54
|
+
return { imagePath, htmlPrompt, htmlSystemPrompt };
|
|
55
|
+
}
|
|
42
56
|
// images for "edit_image"
|
|
43
57
|
const images = (() => {
|
|
44
58
|
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
@@ -69,12 +83,17 @@ export const imagePluginAgent = async (namedInputs) => {
|
|
|
69
83
|
throw error;
|
|
70
84
|
}
|
|
71
85
|
};
|
|
86
|
+
const htmlImageGeneratorAgent = async (namedInputs) => {
|
|
87
|
+
const { html, file, canvasSize } = namedInputs;
|
|
88
|
+
await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
|
|
89
|
+
};
|
|
72
90
|
const beat_graph_data = {
|
|
73
91
|
version: 0.5,
|
|
74
92
|
concurrency: 4,
|
|
75
93
|
nodes: {
|
|
76
94
|
context: {},
|
|
77
95
|
imageAgentInfo: {},
|
|
96
|
+
htmlImageAgentInfo: {},
|
|
78
97
|
movieAgentInfo: {},
|
|
79
98
|
imageRefs: {},
|
|
80
99
|
beat: {},
|
|
@@ -100,6 +119,31 @@ const beat_graph_data = {
|
|
|
100
119
|
onComplete: ":preprocessor",
|
|
101
120
|
},
|
|
102
121
|
},
|
|
122
|
+
htmlImageAgent: {
|
|
123
|
+
if: ":preprocessor.htmlPrompt",
|
|
124
|
+
defaultValue: {},
|
|
125
|
+
agent: ":htmlImageAgentInfo.agent",
|
|
126
|
+
params: {
|
|
127
|
+
mode: ":htmlImageAgentInfo.model",
|
|
128
|
+
},
|
|
129
|
+
inputs: {
|
|
130
|
+
prompt: ":preprocessor.htmlPrompt",
|
|
131
|
+
system: ":preprocessor.htmlSystemPrompt",
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
htmlImageGenerator: {
|
|
135
|
+
if: ":preprocessor.htmlPrompt",
|
|
136
|
+
defaultValue: {},
|
|
137
|
+
agent: htmlImageGeneratorAgent,
|
|
138
|
+
inputs: {
|
|
139
|
+
html: ":htmlImageAgent.text.codeBlockOrRaw()",
|
|
140
|
+
canvasSize: ":context.presentationStyle.canvasSize",
|
|
141
|
+
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
142
|
+
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
143
|
+
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
144
|
+
sessionType: "image", // for fileCacheAgentFilter
|
|
145
|
+
},
|
|
146
|
+
},
|
|
103
147
|
imageGenerator: {
|
|
104
148
|
if: ":preprocessor.prompt",
|
|
105
149
|
agent: ":imageAgentInfo.agent",
|
|
@@ -108,7 +152,6 @@ const beat_graph_data = {
|
|
|
108
152
|
prompt: ":preprocessor.prompt",
|
|
109
153
|
images: ":preprocessor.images",
|
|
110
154
|
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
111
|
-
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
112
155
|
force: ":context.force", // only for fileCacheAgentFilter
|
|
113
156
|
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
114
157
|
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
@@ -157,7 +200,7 @@ const beat_graph_data = {
|
|
|
157
200
|
output: {
|
|
158
201
|
agent: "copyAgent",
|
|
159
202
|
inputs: {
|
|
160
|
-
onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
|
|
203
|
+
onComplete: [":imageFromMovie", ":htmlImageGenerator"], // to wait for imageFromMovie to finish
|
|
161
204
|
imageFile: ":preprocessor.imagePath",
|
|
162
205
|
movieFile: ":preprocessor.movieFile",
|
|
163
206
|
},
|
|
@@ -175,6 +218,7 @@ const graph_data = {
|
|
|
175
218
|
nodes: {
|
|
176
219
|
context: {},
|
|
177
220
|
imageAgentInfo: {},
|
|
221
|
+
htmlImageAgentInfo: {},
|
|
178
222
|
movieAgentInfo: {},
|
|
179
223
|
outputStudioFilePath: {},
|
|
180
224
|
imageRefs: {},
|
|
@@ -184,6 +228,7 @@ const graph_data = {
|
|
|
184
228
|
rows: ":context.studio.script.beats",
|
|
185
229
|
context: ":context",
|
|
186
230
|
imageAgentInfo: ":imageAgentInfo",
|
|
231
|
+
htmlImageAgentInfo: ":htmlImageAgentInfo",
|
|
187
232
|
movieAgentInfo: ":movieAgentInfo",
|
|
188
233
|
imageRefs: ":imageRefs",
|
|
189
234
|
},
|
|
@@ -230,7 +275,6 @@ const graph_data = {
|
|
|
230
275
|
},
|
|
231
276
|
},
|
|
232
277
|
writeOutput: {
|
|
233
|
-
// console: { before: true },
|
|
234
278
|
agent: "fileWriteAgent",
|
|
235
279
|
inputs: {
|
|
236
280
|
file: ":outputStudioFilePath",
|
|
@@ -258,7 +302,7 @@ const graphOption = async (context) => {
|
|
|
258
302
|
{
|
|
259
303
|
name: "fileCacheAgentFilter",
|
|
260
304
|
agent: fileCacheAgentFilter,
|
|
261
|
-
nodeIds: ["imageGenerator", "movieGenerator"],
|
|
305
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator"],
|
|
262
306
|
},
|
|
263
307
|
];
|
|
264
308
|
const taskManager = new TaskManager(getConcurrency(context));
|
|
@@ -332,13 +376,28 @@ const prepareGenerateImages = async (context) => {
|
|
|
332
376
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
333
377
|
mkdir(imageProjectDirPath);
|
|
334
378
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
|
|
379
|
+
const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
|
|
335
380
|
const imageRefs = await getImageRefs(context);
|
|
381
|
+
// Determine movie agent based on provider
|
|
382
|
+
const getMovieAgent = () => {
|
|
383
|
+
if (context.dryRun)
|
|
384
|
+
return "mediaMockAgent";
|
|
385
|
+
const provider = context.presentationStyle.movieParams?.provider ?? "google";
|
|
386
|
+
switch (provider) {
|
|
387
|
+
case "replicate":
|
|
388
|
+
return "movieReplicateAgent";
|
|
389
|
+
case "google":
|
|
390
|
+
default:
|
|
391
|
+
return "movieGoogleAgent";
|
|
392
|
+
}
|
|
393
|
+
};
|
|
336
394
|
GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
|
|
337
395
|
const injections = {
|
|
338
396
|
context,
|
|
339
397
|
imageAgentInfo,
|
|
398
|
+
htmlImageAgentInfo,
|
|
340
399
|
movieAgentInfo: {
|
|
341
|
-
agent:
|
|
400
|
+
agent: getMovieAgent(),
|
|
342
401
|
},
|
|
343
402
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
344
403
|
imageRefs,
|
|
@@ -346,6 +405,9 @@ const prepareGenerateImages = async (context) => {
|
|
|
346
405
|
return injections;
|
|
347
406
|
};
|
|
348
407
|
const getConcurrency = (context) => {
|
|
408
|
+
if (context.presentationStyle.movieParams?.provider === "replicate") {
|
|
409
|
+
return 4;
|
|
410
|
+
}
|
|
349
411
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
350
412
|
if (imageAgentInfo.provider === "openai") {
|
|
351
413
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
@@ -358,7 +420,17 @@ const getConcurrency = (context) => {
|
|
|
358
420
|
const generateImages = async (context, callbacks) => {
|
|
359
421
|
const options = await graphOption(context);
|
|
360
422
|
const injections = await prepareGenerateImages(context);
|
|
361
|
-
const graph = new GraphAI(graph_data, {
|
|
423
|
+
const graph = new GraphAI(graph_data, {
|
|
424
|
+
...vanillaAgents,
|
|
425
|
+
imageGoogleAgent,
|
|
426
|
+
movieGoogleAgent,
|
|
427
|
+
movieReplicateAgent,
|
|
428
|
+
imageOpenaiAgent,
|
|
429
|
+
mediaMockAgent,
|
|
430
|
+
fileWriteAgent,
|
|
431
|
+
openAIAgent,
|
|
432
|
+
anthropicAgent,
|
|
433
|
+
}, options);
|
|
362
434
|
Object.keys(injections).forEach((key) => {
|
|
363
435
|
graph.injectValue(key, injections[key]);
|
|
364
436
|
});
|
|
@@ -385,7 +457,17 @@ export const images = async (context, callbacks) => {
|
|
|
385
457
|
export const generateBeatImage = async (index, context, callbacks) => {
|
|
386
458
|
const options = await graphOption(context);
|
|
387
459
|
const injections = await prepareGenerateImages(context);
|
|
388
|
-
const graph = new GraphAI(beat_graph_data, {
|
|
460
|
+
const graph = new GraphAI(beat_graph_data, {
|
|
461
|
+
...vanillaAgents,
|
|
462
|
+
imageGoogleAgent,
|
|
463
|
+
movieGoogleAgent,
|
|
464
|
+
movieReplicateAgent,
|
|
465
|
+
imageOpenaiAgent,
|
|
466
|
+
mediaMockAgent,
|
|
467
|
+
fileWriteAgent,
|
|
468
|
+
openAIAgent,
|
|
469
|
+
anthropicAgent,
|
|
470
|
+
}, options);
|
|
389
471
|
Object.keys(injections).forEach((key) => {
|
|
390
472
|
if ("outputStudioFilePath" !== key) {
|
|
391
473
|
graph.injectValue(key, injections[key]);
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
|
|
2
|
-
export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
|
|
1
|
+
import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
|
|
2
|
+
export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
|
|
3
3
|
videoId: string;
|
|
4
4
|
videoPart: string;
|
|
5
5
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { GraphAILogger, assert } from "graphai";
|
|
2
|
-
import { mulmoTransitionSchema } from "../types/index.js";
|
|
2
|
+
import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
|
|
3
3
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
4
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
5
5
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
6
6
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
7
7
|
// const isMac = process.platform === "darwin";
|
|
8
8
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
9
|
-
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
9
|
+
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
|
|
10
10
|
const videoId = `v${inputIndex}`;
|
|
11
11
|
const videoFilters = [];
|
|
12
12
|
// Handle different media types
|
|
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
|
19
19
|
videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
|
|
20
20
|
}
|
|
21
21
|
// Common filters for all media types
|
|
22
|
-
videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS"
|
|
23
|
-
//
|
|
24
|
-
|
|
22
|
+
videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
|
|
23
|
+
// Apply scaling based on fill option
|
|
24
|
+
if (fillOption.style === "aspectFill") {
|
|
25
|
+
// For aspect fill: scale to fill the canvas completely, cropping if necessary
|
|
26
|
+
videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
// For aspect fit: scale to fit within canvas, padding if necessary
|
|
30
|
+
videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
|
|
31
|
+
// In case of the aspect ratio mismatch, we fill the extra space with black color.
|
|
32
|
+
`pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
|
|
33
|
+
}
|
|
34
|
+
videoFilters.push("setsar=1", "format=yuv420p");
|
|
25
35
|
return {
|
|
26
36
|
videoId,
|
|
27
37
|
videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
|
|
@@ -95,7 +105,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
|
|
|
95
105
|
return 0;
|
|
96
106
|
})();
|
|
97
107
|
const duration = studioBeat.duration + extraPadding;
|
|
98
|
-
|
|
108
|
+
// Get fillOption from merged imageParams (global + beat-specific)
|
|
109
|
+
const globalFillOption = context.presentationStyle.movieParams?.fillOption;
|
|
110
|
+
const beatFillOption = beat.movieParams?.fillOption;
|
|
111
|
+
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
112
|
+
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
113
|
+
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
|
|
99
114
|
ffmpegContext.filterComplex.push(videoPart);
|
|
100
115
|
if (caption && studioBeat.captionFile) {
|
|
101
116
|
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
|
|
@@ -9,7 +9,7 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
9
9
|
const totalDuration = speechDuration + introPadding + outroPadding;
|
|
10
10
|
GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
|
|
11
11
|
const ffmpegContext = FfmpegContextInit();
|
|
12
|
-
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile);
|
|
12
|
+
const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
|
|
13
13
|
const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
|
|
14
14
|
ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
|
|
15
15
|
ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { assert } from "graphai";
|
|
1
|
+
import { assert, GraphAILogger } from "graphai";
|
|
2
2
|
import { silent60secPath } from "../utils/file.js";
|
|
3
3
|
import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
4
4
|
const getMovieDulation = async (beat) => {
|
|
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
77
77
|
const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
|
|
78
78
|
// Yes, the current beat has spilled over audio.
|
|
79
79
|
const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
|
|
80
|
-
if (beatsTotalDuration > audioDuration) {
|
|
80
|
+
if (beatsTotalDuration > audioDuration + 0.01) {
|
|
81
|
+
// 0.01 is a tolerance to avoid floating point precision issues
|
|
81
82
|
group.reduce((remaining, idx, iGroup) => {
|
|
82
83
|
if (remaining >= groupBeatsDurations[iGroup]) {
|
|
83
84
|
return remaining - groupBeatsDurations[iGroup];
|
|
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
88
89
|
}
|
|
89
90
|
else {
|
|
90
91
|
// Last beat gets the rest of the audio.
|
|
91
|
-
|
|
92
|
+
if (audioDuration > beatsTotalDuration) {
|
|
93
|
+
groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
|
|
94
|
+
}
|
|
92
95
|
}
|
|
93
96
|
beatDurations.push(...groupBeatsDurations);
|
|
94
97
|
}
|
|
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
98
101
|
// padding is the amount of audio padding specified in the script.
|
|
99
102
|
const padding = getPadding(context, beat, index);
|
|
100
103
|
// totalPadding is the amount of audio padding to be added to the audio file.
|
|
101
|
-
const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
|
|
104
|
+
const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
|
|
102
105
|
const beatDuration = audioDuration + totalPadding;
|
|
103
106
|
beatDurations.push(beatDuration);
|
|
104
107
|
if (totalPadding > 0) {
|
|
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
124
127
|
// We cannot reuse longSilentId. We need to explicitly split it for each beat.
|
|
125
128
|
const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
|
|
126
129
|
if (silentIds.length > 0) {
|
|
127
|
-
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
|
|
130
|
+
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
|
|
128
131
|
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
129
132
|
}
|
|
130
133
|
const inputIds = [];
|
|
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
142
145
|
}
|
|
143
146
|
});
|
|
144
147
|
assert(silentIds.length === 0, "silentIds.length !== 0");
|
|
148
|
+
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
145
149
|
// Finally, combine all audio files.
|
|
146
150
|
ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
|
|
147
151
|
await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
|
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
7
|
+
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
7
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
8
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
9
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
13
14
|
import { textInputAgent } from "@graphai/input_agents";
|
|
14
15
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
15
16
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
16
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
17
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
|
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
7
|
+
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
7
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
8
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
9
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
14
15
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
15
16
|
// import * as vanilla from "@graphai/vanilla";
|
|
16
17
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
17
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
18
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
3
|
+
width: number;
|
|
4
|
+
height: number;
|
|
5
|
+
}) => string;
|
|
6
|
+
export type MovieReplicateConfig = {
|
|
7
|
+
apiKey?: string;
|
|
8
|
+
};
|
|
9
|
+
export declare const movieReplicateAgent: AgentFunction<{
|
|
10
|
+
model: `${string}/${string}` | undefined;
|
|
11
|
+
canvasSize: {
|
|
12
|
+
width: number;
|
|
13
|
+
height: number;
|
|
14
|
+
};
|
|
15
|
+
duration?: number;
|
|
16
|
+
}, {
|
|
17
|
+
buffer: Buffer;
|
|
18
|
+
}, {
|
|
19
|
+
prompt: string;
|
|
20
|
+
imagePath?: string;
|
|
21
|
+
}, MovieReplicateConfig>;
|
|
22
|
+
declare const movieReplicateAgentInfo: AgentFunctionInfo;
|
|
23
|
+
export default movieReplicateAgentInfo;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger } from "graphai";
|
|
3
|
+
import Replicate from "replicate";
|
|
4
|
+
async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
|
|
5
|
+
const replicate = new Replicate({
|
|
6
|
+
auth: apiKey,
|
|
7
|
+
});
|
|
8
|
+
const input = {
|
|
9
|
+
prompt: prompt,
|
|
10
|
+
duration: duration,
|
|
11
|
+
image: undefined,
|
|
12
|
+
start_image: undefined,
|
|
13
|
+
aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
|
|
14
|
+
// resolution: "720p", // only for bytedance/seedance-1-lite
|
|
15
|
+
// fps: 24, // only for bytedance/seedance-1-lite
|
|
16
|
+
// camera_fixed: false, // only for bytedance/seedance-1-lite
|
|
17
|
+
// mode: "standard" // only for kwaivgi/kling-v2.1
|
|
18
|
+
// negative_prompt: "" // only for kwaivgi/kling-v2.1
|
|
19
|
+
};
|
|
20
|
+
// Add image if provided (for image-to-video generation)
|
|
21
|
+
if (imagePath) {
|
|
22
|
+
const buffer = readFileSync(imagePath);
|
|
23
|
+
const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
|
|
24
|
+
if (model === "kwaivgi/kling-v2.1") {
|
|
25
|
+
input.start_image = base64Image;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
input.image = base64Image;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
try {
|
|
32
|
+
const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
|
|
33
|
+
// Download the generated video
|
|
34
|
+
if (output && typeof output === "object" && "url" in output) {
|
|
35
|
+
const videoUrl = output.url();
|
|
36
|
+
const videoResponse = await fetch(videoUrl);
|
|
37
|
+
if (!videoResponse.ok) {
|
|
38
|
+
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
|
|
39
|
+
}
|
|
40
|
+
const arrayBuffer = await videoResponse.arrayBuffer();
|
|
41
|
+
return Buffer.from(arrayBuffer);
|
|
42
|
+
}
|
|
43
|
+
return undefined;
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
GraphAILogger.info("Replicate generation error:", error);
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export const getAspectRatio = (canvasSize) => {
|
|
51
|
+
if (canvasSize.width > canvasSize.height) {
|
|
52
|
+
return "16:9";
|
|
53
|
+
}
|
|
54
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
55
|
+
return "9:16";
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
return "1:1";
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
export const movieReplicateAgent = async ({ namedInputs, params, config }) => {
|
|
62
|
+
const { prompt, imagePath } = namedInputs;
|
|
63
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
64
|
+
const duration = params.duration ?? 5;
|
|
65
|
+
const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
|
|
66
|
+
if (!apiKey) {
|
|
67
|
+
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
68
|
+
}
|
|
69
|
+
try {
|
|
70
|
+
const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
|
|
71
|
+
if (buffer) {
|
|
72
|
+
return { buffer };
|
|
73
|
+
}
|
|
74
|
+
throw new Error("ERROR: generateMovie returned undefined");
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
GraphAILogger.info("Failed to generate movie:", error.message);
|
|
78
|
+
throw error;
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
const movieReplicateAgentInfo = {
|
|
82
|
+
name: "movieReplicateAgent",
|
|
83
|
+
agent: movieReplicateAgent,
|
|
84
|
+
mock: movieReplicateAgent,
|
|
85
|
+
samples: [],
|
|
86
|
+
description: "Replicate Movie agent using seedance-1-lite",
|
|
87
|
+
category: ["movie"],
|
|
88
|
+
author: "Receptron Team",
|
|
89
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
90
|
+
license: "MIT",
|
|
91
|
+
environmentVariables: ["REPLICATE_API_TOKEN"],
|
|
92
|
+
};
|
|
93
|
+
export default movieReplicateAgentInfo;
|
|
@@ -5,6 +5,8 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
5
5
|
b: string | undefined;
|
|
6
6
|
} & {
|
|
7
7
|
u: string[] | never[];
|
|
8
|
+
} & {
|
|
9
|
+
"input-file": string | undefined;
|
|
8
10
|
} & {
|
|
9
11
|
i: boolean | undefined;
|
|
10
12
|
} & {
|
|
@@ -14,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
14
16
|
} & {
|
|
15
17
|
s: string;
|
|
16
18
|
} & {
|
|
17
|
-
llm: "
|
|
19
|
+
llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
|
|
18
20
|
} & {
|
|
19
21
|
llm_model: string | undefined;
|
|
20
22
|
}>;
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
import { getBaseDirPath, getFullPath } from "../../../../utils/file.js";
|
|
2
2
|
import { outDirName, cacheDirName } from "../../../../utils/const.js";
|
|
3
3
|
import { getUrlsIfNeeded, selectTemplate } from "../../../../utils/inquirer.js";
|
|
4
|
-
import { createMulmoScriptFromUrl } from "../../../../tools/create_mulmo_script_from_url.js";
|
|
4
|
+
import { createMulmoScriptFromUrl, createMulmoScriptFromFile } from "../../../../tools/create_mulmo_script_from_url.js";
|
|
5
5
|
import { createMulmoScriptInteractively } from "../../../../tools/create_mulmo_script_interactively.js";
|
|
6
6
|
import { setGraphAILogger } from "../../../../cli/helpers.js";
|
|
7
7
|
export const handler = async (argv) => {
|
|
8
|
-
const { o: outdir, b: basedir, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
|
|
8
|
+
const { o: outdir, b: basedir, "input-file": inputFile, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
|
|
9
9
|
let { t: template } = argv;
|
|
10
10
|
const urls = argv.u || [];
|
|
11
11
|
const baseDirPath = getBaseDirPath(basedir);
|
|
12
12
|
const outDirPath = getFullPath(baseDirPath, outdir ?? outDirName);
|
|
13
13
|
const cacheDirPath = getFullPath(outDirPath, cache ?? cacheDirName);
|
|
14
14
|
if (!template) {
|
|
15
|
-
|
|
15
|
+
if (interactive) {
|
|
16
|
+
template = await selectTemplate();
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
template = "business";
|
|
20
|
+
}
|
|
16
21
|
}
|
|
17
22
|
setGraphAILogger(verbose, {
|
|
18
23
|
baseDirPath,
|
|
@@ -22,13 +27,17 @@ export const handler = async (argv) => {
|
|
|
22
27
|
urls,
|
|
23
28
|
interactive,
|
|
24
29
|
filename,
|
|
30
|
+
inputFile,
|
|
25
31
|
llm,
|
|
26
32
|
llm_model,
|
|
27
33
|
});
|
|
28
|
-
const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm };
|
|
34
|
+
const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm, verbose };
|
|
29
35
|
if (interactive) {
|
|
30
36
|
await createMulmoScriptInteractively(context);
|
|
31
37
|
}
|
|
38
|
+
if (inputFile) {
|
|
39
|
+
await createMulmoScriptFromFile(inputFile, context);
|
|
40
|
+
}
|
|
32
41
|
else {
|
|
33
42
|
context.urls = await getUrlsIfNeeded(urls);
|
|
34
43
|
await createMulmoScriptFromUrl(context);
|