mulmocast 0.0.22 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/lib/actions/captions.js +1 -1
- package/lib/actions/images.d.ts +7 -3
- package/lib/actions/images.js +61 -17
- package/lib/actions/movie.d.ts +2 -2
- package/lib/actions/movie.js +21 -6
- package/lib/agents/combine_audio_files_agent.js +9 -5
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_replicate_agent.d.ts +23 -0
- package/lib/agents/movie_replicate_agent.js +93 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
- package/lib/cli/commands/tool/scripting/builder.js +5 -0
- package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
- package/lib/cli/commands/tool/scripting/handler.js +13 -4
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/cli/helpers.js +8 -3
- package/lib/methods/mulmo_presentation_style.d.ts +2 -1
- package/lib/methods/mulmo_presentation_style.js +21 -2
- package/lib/methods/mulmo_studio_context.js +1 -1
- package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
- package/lib/tools/create_mulmo_script_from_url.js +129 -43
- package/lib/types/schema.d.ts +1123 -163
- package/lib/types/schema.js +38 -1
- package/lib/types/type.d.ts +9 -2
- package/lib/utils/ffmpeg_utils.d.ts +1 -1
- package/lib/utils/ffmpeg_utils.js +2 -2
- package/lib/utils/preprocess.d.ts +41 -6
- package/lib/utils/utils.d.ts +1 -0
- package/lib/utils/utils.js +5 -0
- package/package.json +3 -2
- package/scripts/templates/presentation.json +123 -0
- package/scripts/templates/presentation.json~ +119 -0
package/README.md
CHANGED
|
@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
|
|
|
103
103
|
|
|
104
104
|
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
105
105
|
|
|
106
|
+
#### (Optional) For Movie models
|
|
107
|
+
```bash
|
|
108
|
+
REPLICATE_API_TOKEN=your_replicate_api_key
|
|
109
|
+
```
|
|
110
|
+
|
|
106
111
|
#### (Optional) For TTS models
|
|
107
112
|
```bash
|
|
108
113
|
# For Nijivoice TTS
|
package/lib/actions/captions.js
CHANGED
|
@@ -61,7 +61,7 @@ const graph_data = {
|
|
|
61
61
|
},
|
|
62
62
|
};
|
|
63
63
|
export const captions = async (context, callbacks) => {
|
|
64
|
-
if (context
|
|
64
|
+
if (MulmoStudioContextMethods.getCaption(context)) {
|
|
65
65
|
try {
|
|
66
66
|
MulmoStudioContextMethods.setSessionState(context, "caption", true);
|
|
67
67
|
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
8
8
|
imageRefs: Record<string, string>;
|
|
9
9
|
}) => Promise<{
|
|
10
10
|
imageParams: {
|
|
11
|
-
model?: string | undefined;
|
|
12
11
|
style?: string | undefined;
|
|
12
|
+
model?: string | undefined;
|
|
13
13
|
moderation?: string | undefined;
|
|
14
14
|
images?: Record<string, {
|
|
15
15
|
type: "image";
|
|
@@ -32,16 +32,18 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
32
32
|
imagePath: string | undefined;
|
|
33
33
|
referenceImage: string | undefined;
|
|
34
34
|
htmlPrompt?: undefined;
|
|
35
|
+
htmlSystemPrompt?: undefined;
|
|
35
36
|
} | {
|
|
36
37
|
imagePath: string;
|
|
37
38
|
htmlPrompt: string;
|
|
39
|
+
htmlSystemPrompt: string[];
|
|
38
40
|
} | {
|
|
39
41
|
imagePath: string;
|
|
40
42
|
images: string[];
|
|
41
43
|
imageFromMovie: boolean;
|
|
42
44
|
imageParams: {
|
|
43
|
-
model?: string | undefined;
|
|
44
45
|
style?: string | undefined;
|
|
46
|
+
model?: string | undefined;
|
|
45
47
|
moderation?: string | undefined;
|
|
46
48
|
images?: Record<string, {
|
|
47
49
|
type: "image";
|
|
@@ -62,11 +64,12 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
62
64
|
};
|
|
63
65
|
movieFile: string | undefined;
|
|
64
66
|
htmlPrompt?: undefined;
|
|
67
|
+
htmlSystemPrompt?: undefined;
|
|
65
68
|
} | {
|
|
66
69
|
images: string[];
|
|
67
70
|
imageParams: {
|
|
68
|
-
model?: string | undefined;
|
|
69
71
|
style?: string | undefined;
|
|
72
|
+
model?: string | undefined;
|
|
70
73
|
moderation?: string | undefined;
|
|
71
74
|
images?: Record<string, {
|
|
72
75
|
type: "image";
|
|
@@ -90,6 +93,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
90
93
|
referenceImage: string;
|
|
91
94
|
prompt: string;
|
|
92
95
|
htmlPrompt?: undefined;
|
|
96
|
+
htmlSystemPrompt?: undefined;
|
|
93
97
|
}>;
|
|
94
98
|
export declare const imagePluginAgent: (namedInputs: {
|
|
95
99
|
context: MulmoStudioContext;
|
package/lib/actions/images.js
CHANGED
|
@@ -4,10 +4,11 @@ import { GraphAI, GraphAILogger } from "graphai";
|
|
|
4
4
|
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
5
5
|
import * as agents from "@graphai/vanilla";
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
|
+
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
7
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
8
9
|
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
9
10
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
10
|
-
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
|
+
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
12
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
12
13
|
import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
13
14
|
import { imagePrompt } from "../utils/prompt.js";
|
|
@@ -15,7 +16,6 @@ import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
|
15
16
|
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
16
17
|
const vanillaAgents = agents.default ?? agents;
|
|
17
18
|
dotenv.config();
|
|
18
|
-
// const openai = new OpenAI();
|
|
19
19
|
import { GoogleAuth } from "google-auth-library";
|
|
20
20
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
21
21
|
const htmlStyle = (context, beat) => {
|
|
@@ -43,7 +43,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
43
43
|
}
|
|
44
44
|
if (beat.htmlPrompt) {
|
|
45
45
|
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
46
|
-
|
|
46
|
+
const htmlSystemPrompt = [
|
|
47
|
+
"Based on the provided information, create a single slide HTML page using Tailwind CSS.",
|
|
48
|
+
`The view port size is ${context.presentationStyle.canvasSize.width}x${context.presentationStyle.canvasSize.height}. Make sure the HTML fits within the view port.`,
|
|
49
|
+
"If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
|
|
50
|
+
"Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
|
|
51
|
+
"Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
|
|
52
|
+
"If data is provided, use it effectively to populate the slide.",
|
|
53
|
+
];
|
|
54
|
+
return { imagePath, htmlPrompt, htmlSystemPrompt };
|
|
47
55
|
}
|
|
48
56
|
// images for "edit_image"
|
|
49
57
|
const images = (() => {
|
|
@@ -85,6 +93,7 @@ const beat_graph_data = {
|
|
|
85
93
|
nodes: {
|
|
86
94
|
context: {},
|
|
87
95
|
imageAgentInfo: {},
|
|
96
|
+
htmlImageAgentInfo: {},
|
|
88
97
|
movieAgentInfo: {},
|
|
89
98
|
imageRefs: {},
|
|
90
99
|
beat: {},
|
|
@@ -113,25 +122,21 @@ const beat_graph_data = {
|
|
|
113
122
|
htmlImageAgent: {
|
|
114
123
|
if: ":preprocessor.htmlPrompt",
|
|
115
124
|
defaultValue: {},
|
|
116
|
-
agent: "
|
|
125
|
+
agent: ":htmlImageAgentInfo.agent",
|
|
126
|
+
params: {
|
|
127
|
+
mode: ":htmlImageAgentInfo.model",
|
|
128
|
+
},
|
|
117
129
|
inputs: {
|
|
118
130
|
prompt: ":preprocessor.htmlPrompt",
|
|
119
|
-
system:
|
|
120
|
-
"Based on the provided information, create a single slide HTML page using Tailwind CSS.",
|
|
121
|
-
"If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
|
|
122
|
-
"Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
|
|
123
|
-
"Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
|
|
124
|
-
"If data is provided, use it effectively to populate the slide.",
|
|
125
|
-
],
|
|
131
|
+
system: ":preprocessor.htmlSystemPrompt",
|
|
126
132
|
},
|
|
127
133
|
},
|
|
128
134
|
htmlImageGenerator: {
|
|
129
135
|
if: ":preprocessor.htmlPrompt",
|
|
130
136
|
defaultValue: {},
|
|
131
137
|
agent: htmlImageGeneratorAgent,
|
|
132
|
-
// console: { before: true, after: true },
|
|
133
138
|
inputs: {
|
|
134
|
-
html: ":htmlImageAgent.text.
|
|
139
|
+
html: ":htmlImageAgent.text.codeBlockOrRaw()",
|
|
135
140
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
136
141
|
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
137
142
|
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
@@ -213,6 +218,7 @@ const graph_data = {
|
|
|
213
218
|
nodes: {
|
|
214
219
|
context: {},
|
|
215
220
|
imageAgentInfo: {},
|
|
221
|
+
htmlImageAgentInfo: {},
|
|
216
222
|
movieAgentInfo: {},
|
|
217
223
|
outputStudioFilePath: {},
|
|
218
224
|
imageRefs: {},
|
|
@@ -222,6 +228,7 @@ const graph_data = {
|
|
|
222
228
|
rows: ":context.studio.script.beats",
|
|
223
229
|
context: ":context",
|
|
224
230
|
imageAgentInfo: ":imageAgentInfo",
|
|
231
|
+
htmlImageAgentInfo: ":htmlImageAgentInfo",
|
|
225
232
|
movieAgentInfo: ":movieAgentInfo",
|
|
226
233
|
imageRefs: ":imageRefs",
|
|
227
234
|
},
|
|
@@ -268,7 +275,6 @@ const graph_data = {
|
|
|
268
275
|
},
|
|
269
276
|
},
|
|
270
277
|
writeOutput: {
|
|
271
|
-
// console: { before: true },
|
|
272
278
|
agent: "fileWriteAgent",
|
|
273
279
|
inputs: {
|
|
274
280
|
file: ":outputStudioFilePath",
|
|
@@ -370,13 +376,28 @@ const prepareGenerateImages = async (context) => {
|
|
|
370
376
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
371
377
|
mkdir(imageProjectDirPath);
|
|
372
378
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
|
|
379
|
+
const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
|
|
373
380
|
const imageRefs = await getImageRefs(context);
|
|
381
|
+
// Determine movie agent based on provider
|
|
382
|
+
const getMovieAgent = () => {
|
|
383
|
+
if (context.dryRun)
|
|
384
|
+
return "mediaMockAgent";
|
|
385
|
+
const provider = context.presentationStyle.movieParams?.provider ?? "google";
|
|
386
|
+
switch (provider) {
|
|
387
|
+
case "replicate":
|
|
388
|
+
return "movieReplicateAgent";
|
|
389
|
+
case "google":
|
|
390
|
+
default:
|
|
391
|
+
return "movieGoogleAgent";
|
|
392
|
+
}
|
|
393
|
+
};
|
|
374
394
|
GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
|
|
375
395
|
const injections = {
|
|
376
396
|
context,
|
|
377
397
|
imageAgentInfo,
|
|
398
|
+
htmlImageAgentInfo,
|
|
378
399
|
movieAgentInfo: {
|
|
379
|
-
agent:
|
|
400
|
+
agent: getMovieAgent(),
|
|
380
401
|
},
|
|
381
402
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
382
403
|
imageRefs,
|
|
@@ -384,6 +405,9 @@ const prepareGenerateImages = async (context) => {
|
|
|
384
405
|
return injections;
|
|
385
406
|
};
|
|
386
407
|
const getConcurrency = (context) => {
|
|
408
|
+
if (context.presentationStyle.movieParams?.provider === "replicate") {
|
|
409
|
+
return 4;
|
|
410
|
+
}
|
|
387
411
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
388
412
|
if (imageAgentInfo.provider === "openai") {
|
|
389
413
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
@@ -396,7 +420,17 @@ const getConcurrency = (context) => {
|
|
|
396
420
|
const generateImages = async (context, callbacks) => {
|
|
397
421
|
const options = await graphOption(context);
|
|
398
422
|
const injections = await prepareGenerateImages(context);
|
|
399
|
-
const graph = new GraphAI(graph_data, {
|
|
423
|
+
const graph = new GraphAI(graph_data, {
|
|
424
|
+
...vanillaAgents,
|
|
425
|
+
imageGoogleAgent,
|
|
426
|
+
movieGoogleAgent,
|
|
427
|
+
movieReplicateAgent,
|
|
428
|
+
imageOpenaiAgent,
|
|
429
|
+
mediaMockAgent,
|
|
430
|
+
fileWriteAgent,
|
|
431
|
+
openAIAgent,
|
|
432
|
+
anthropicAgent,
|
|
433
|
+
}, options);
|
|
400
434
|
Object.keys(injections).forEach((key) => {
|
|
401
435
|
graph.injectValue(key, injections[key]);
|
|
402
436
|
});
|
|
@@ -423,7 +457,17 @@ export const images = async (context, callbacks) => {
|
|
|
423
457
|
export const generateBeatImage = async (index, context, callbacks) => {
|
|
424
458
|
const options = await graphOption(context);
|
|
425
459
|
const injections = await prepareGenerateImages(context);
|
|
426
|
-
const graph = new GraphAI(beat_graph_data, {
|
|
460
|
+
const graph = new GraphAI(beat_graph_data, {
|
|
461
|
+
...vanillaAgents,
|
|
462
|
+
imageGoogleAgent,
|
|
463
|
+
movieGoogleAgent,
|
|
464
|
+
movieReplicateAgent,
|
|
465
|
+
imageOpenaiAgent,
|
|
466
|
+
mediaMockAgent,
|
|
467
|
+
fileWriteAgent,
|
|
468
|
+
openAIAgent,
|
|
469
|
+
anthropicAgent,
|
|
470
|
+
}, options);
|
|
427
471
|
Object.keys(injections).forEach((key) => {
|
|
428
472
|
if ("outputStudioFilePath" !== key) {
|
|
429
473
|
graph.injectValue(key, injections[key]);
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
|
|
2
|
-
export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
|
|
1
|
+
import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
|
|
2
|
+
export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
|
|
3
3
|
videoId: string;
|
|
4
4
|
videoPart: string;
|
|
5
5
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { GraphAILogger, assert } from "graphai";
|
|
2
|
-
import { mulmoTransitionSchema } from "../types/index.js";
|
|
2
|
+
import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
|
|
3
3
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
4
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
5
5
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
6
6
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
7
7
|
// const isMac = process.platform === "darwin";
|
|
8
8
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
9
|
-
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
9
|
+
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
|
|
10
10
|
const videoId = `v${inputIndex}`;
|
|
11
11
|
const videoFilters = [];
|
|
12
12
|
// Handle different media types
|
|
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
|
19
19
|
videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
|
|
20
20
|
}
|
|
21
21
|
// Common filters for all media types
|
|
22
|
-
videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS"
|
|
23
|
-
//
|
|
24
|
-
|
|
22
|
+
videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
|
|
23
|
+
// Apply scaling based on fill option
|
|
24
|
+
if (fillOption.style === "aspectFill") {
|
|
25
|
+
// For aspect fill: scale to fill the canvas completely, cropping if necessary
|
|
26
|
+
videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
// For aspect fit: scale to fit within canvas, padding if necessary
|
|
30
|
+
videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
|
|
31
|
+
// In case of the aspect ratio mismatch, we fill the extra space with black color.
|
|
32
|
+
`pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
|
|
33
|
+
}
|
|
34
|
+
videoFilters.push("setsar=1", "format=yuv420p");
|
|
25
35
|
return {
|
|
26
36
|
videoId,
|
|
27
37
|
videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
|
|
@@ -95,7 +105,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
|
|
|
95
105
|
return 0;
|
|
96
106
|
})();
|
|
97
107
|
const duration = studioBeat.duration + extraPadding;
|
|
98
|
-
|
|
108
|
+
// Get fillOption from merged imageParams (global + beat-specific)
|
|
109
|
+
const globalFillOption = context.presentationStyle.movieParams?.fillOption;
|
|
110
|
+
const beatFillOption = beat.movieParams?.fillOption;
|
|
111
|
+
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
112
|
+
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
113
|
+
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
|
|
99
114
|
ffmpegContext.filterComplex.push(videoPart);
|
|
100
115
|
if (caption && studioBeat.captionFile) {
|
|
101
116
|
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { assert } from "graphai";
|
|
1
|
+
import { assert, GraphAILogger } from "graphai";
|
|
2
2
|
import { silent60secPath } from "../utils/file.js";
|
|
3
3
|
import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
4
4
|
const getMovieDulation = async (beat) => {
|
|
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
77
77
|
const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
|
|
78
78
|
// Yes, the current beat has spilled over audio.
|
|
79
79
|
const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
|
|
80
|
-
if (beatsTotalDuration > audioDuration) {
|
|
80
|
+
if (beatsTotalDuration > audioDuration + 0.01) {
|
|
81
|
+
// 0.01 is a tolerance to avoid floating point precision issues
|
|
81
82
|
group.reduce((remaining, idx, iGroup) => {
|
|
82
83
|
if (remaining >= groupBeatsDurations[iGroup]) {
|
|
83
84
|
return remaining - groupBeatsDurations[iGroup];
|
|
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
88
89
|
}
|
|
89
90
|
else {
|
|
90
91
|
// Last beat gets the rest of the audio.
|
|
91
|
-
|
|
92
|
+
if (audioDuration > beatsTotalDuration) {
|
|
93
|
+
groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
|
|
94
|
+
}
|
|
92
95
|
}
|
|
93
96
|
beatDurations.push(...groupBeatsDurations);
|
|
94
97
|
}
|
|
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
98
101
|
// padding is the amount of audio padding specified in the script.
|
|
99
102
|
const padding = getPadding(context, beat, index);
|
|
100
103
|
// totalPadding is the amount of audio padding to be added to the audio file.
|
|
101
|
-
const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
|
|
104
|
+
const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
|
|
102
105
|
const beatDuration = audioDuration + totalPadding;
|
|
103
106
|
beatDurations.push(beatDuration);
|
|
104
107
|
if (totalPadding > 0) {
|
|
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
124
127
|
// We cannot reuse longSilentId. We need to explicitly split it for each beat.
|
|
125
128
|
const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
|
|
126
129
|
if (silentIds.length > 0) {
|
|
127
|
-
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
|
|
130
|
+
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
|
|
128
131
|
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
129
132
|
}
|
|
130
133
|
const inputIds = [];
|
|
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
142
145
|
}
|
|
143
146
|
});
|
|
144
147
|
assert(silentIds.length === 0, "silentIds.length !== 0");
|
|
148
|
+
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
145
149
|
// Finally, combine all audio files.
|
|
146
150
|
ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
|
|
147
151
|
await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
|
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
7
|
+
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
7
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
8
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
9
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
13
14
|
import { textInputAgent } from "@graphai/input_agents";
|
|
14
15
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
15
16
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
16
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
17
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
|
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
6
|
import movieGoogleAgent from "./movie_google_agent.js";
|
|
7
|
+
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
7
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
8
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
9
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
14
15
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
15
16
|
// import * as vanilla from "@graphai/vanilla";
|
|
16
17
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
17
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
18
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
3
|
+
width: number;
|
|
4
|
+
height: number;
|
|
5
|
+
}) => string;
|
|
6
|
+
export type MovieReplicateConfig = {
|
|
7
|
+
apiKey?: string;
|
|
8
|
+
};
|
|
9
|
+
export declare const movieReplicateAgent: AgentFunction<{
|
|
10
|
+
model: `${string}/${string}` | undefined;
|
|
11
|
+
canvasSize: {
|
|
12
|
+
width: number;
|
|
13
|
+
height: number;
|
|
14
|
+
};
|
|
15
|
+
duration?: number;
|
|
16
|
+
}, {
|
|
17
|
+
buffer: Buffer;
|
|
18
|
+
}, {
|
|
19
|
+
prompt: string;
|
|
20
|
+
imagePath?: string;
|
|
21
|
+
}, MovieReplicateConfig>;
|
|
22
|
+
declare const movieReplicateAgentInfo: AgentFunctionInfo;
|
|
23
|
+
export default movieReplicateAgentInfo;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger } from "graphai";
|
|
3
|
+
import Replicate from "replicate";
|
|
4
|
+
async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
|
|
5
|
+
const replicate = new Replicate({
|
|
6
|
+
auth: apiKey,
|
|
7
|
+
});
|
|
8
|
+
const input = {
|
|
9
|
+
prompt: prompt,
|
|
10
|
+
duration: duration,
|
|
11
|
+
image: undefined,
|
|
12
|
+
start_image: undefined,
|
|
13
|
+
aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
|
|
14
|
+
// resolution: "720p", // only for bytedance/seedance-1-lite
|
|
15
|
+
// fps: 24, // only for bytedance/seedance-1-lite
|
|
16
|
+
// camera_fixed: false, // only for bytedance/seedance-1-lite
|
|
17
|
+
// mode: "standard" // only for kwaivgi/kling-v2.1
|
|
18
|
+
// negative_prompt: "" // only for kwaivgi/kling-v2.1
|
|
19
|
+
};
|
|
20
|
+
// Add image if provided (for image-to-video generation)
|
|
21
|
+
if (imagePath) {
|
|
22
|
+
const buffer = readFileSync(imagePath);
|
|
23
|
+
const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
|
|
24
|
+
if (model === "kwaivgi/kling-v2.1") {
|
|
25
|
+
input.start_image = base64Image;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
input.image = base64Image;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
try {
|
|
32
|
+
const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
|
|
33
|
+
// Download the generated video
|
|
34
|
+
if (output && typeof output === "object" && "url" in output) {
|
|
35
|
+
const videoUrl = output.url();
|
|
36
|
+
const videoResponse = await fetch(videoUrl);
|
|
37
|
+
if (!videoResponse.ok) {
|
|
38
|
+
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
|
|
39
|
+
}
|
|
40
|
+
const arrayBuffer = await videoResponse.arrayBuffer();
|
|
41
|
+
return Buffer.from(arrayBuffer);
|
|
42
|
+
}
|
|
43
|
+
return undefined;
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
GraphAILogger.info("Replicate generation error:", error);
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export const getAspectRatio = (canvasSize) => {
|
|
51
|
+
if (canvasSize.width > canvasSize.height) {
|
|
52
|
+
return "16:9";
|
|
53
|
+
}
|
|
54
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
55
|
+
return "9:16";
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
return "1:1";
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
export const movieReplicateAgent = async ({ namedInputs, params, config }) => {
|
|
62
|
+
const { prompt, imagePath } = namedInputs;
|
|
63
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
64
|
+
const duration = params.duration ?? 5;
|
|
65
|
+
const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
|
|
66
|
+
if (!apiKey) {
|
|
67
|
+
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
68
|
+
}
|
|
69
|
+
try {
|
|
70
|
+
const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
|
|
71
|
+
if (buffer) {
|
|
72
|
+
return { buffer };
|
|
73
|
+
}
|
|
74
|
+
throw new Error("ERROR: generateMovie returned undefined");
|
|
75
|
+
}
|
|
76
|
+
catch (error) {
|
|
77
|
+
GraphAILogger.info("Failed to generate movie:", error.message);
|
|
78
|
+
throw error;
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
const movieReplicateAgentInfo = {
|
|
82
|
+
name: "movieReplicateAgent",
|
|
83
|
+
agent: movieReplicateAgent,
|
|
84
|
+
mock: movieReplicateAgent,
|
|
85
|
+
samples: [],
|
|
86
|
+
description: "Replicate Movie agent using seedance-1-lite",
|
|
87
|
+
category: ["movie"],
|
|
88
|
+
author: "Receptron Team",
|
|
89
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
90
|
+
license: "MIT",
|
|
91
|
+
environmentVariables: ["REPLICATE_API_TOKEN"],
|
|
92
|
+
};
|
|
93
|
+
export default movieReplicateAgentInfo;
|
|
@@ -5,6 +5,8 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
5
5
|
b: string | undefined;
|
|
6
6
|
} & {
|
|
7
7
|
u: string[] | never[];
|
|
8
|
+
} & {
|
|
9
|
+
"input-file": string | undefined;
|
|
8
10
|
} & {
|
|
9
11
|
i: boolean | undefined;
|
|
10
12
|
} & {
|
|
@@ -14,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
14
16
|
} & {
|
|
15
17
|
s: string;
|
|
16
18
|
} & {
|
|
17
|
-
llm: "
|
|
19
|
+
llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
|
|
18
20
|
} & {
|
|
19
21
|
llm_model: string | undefined;
|
|
20
22
|
}>;
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
import { getBaseDirPath, getFullPath } from "../../../../utils/file.js";
|
|
2
2
|
import { outDirName, cacheDirName } from "../../../../utils/const.js";
|
|
3
3
|
import { getUrlsIfNeeded, selectTemplate } from "../../../../utils/inquirer.js";
|
|
4
|
-
import { createMulmoScriptFromUrl } from "../../../../tools/create_mulmo_script_from_url.js";
|
|
4
|
+
import { createMulmoScriptFromUrl, createMulmoScriptFromFile } from "../../../../tools/create_mulmo_script_from_url.js";
|
|
5
5
|
import { createMulmoScriptInteractively } from "../../../../tools/create_mulmo_script_interactively.js";
|
|
6
6
|
import { setGraphAILogger } from "../../../../cli/helpers.js";
|
|
7
7
|
export const handler = async (argv) => {
|
|
8
|
-
const { o: outdir, b: basedir, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
|
|
8
|
+
const { o: outdir, b: basedir, "input-file": inputFile, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
|
|
9
9
|
let { t: template } = argv;
|
|
10
10
|
const urls = argv.u || [];
|
|
11
11
|
const baseDirPath = getBaseDirPath(basedir);
|
|
12
12
|
const outDirPath = getFullPath(baseDirPath, outdir ?? outDirName);
|
|
13
13
|
const cacheDirPath = getFullPath(outDirPath, cache ?? cacheDirName);
|
|
14
14
|
if (!template) {
|
|
15
|
-
|
|
15
|
+
if (interactive) {
|
|
16
|
+
template = await selectTemplate();
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
template = "business";
|
|
20
|
+
}
|
|
16
21
|
}
|
|
17
22
|
setGraphAILogger(verbose, {
|
|
18
23
|
baseDirPath,
|
|
@@ -22,13 +27,17 @@ export const handler = async (argv) => {
|
|
|
22
27
|
urls,
|
|
23
28
|
interactive,
|
|
24
29
|
filename,
|
|
30
|
+
inputFile,
|
|
25
31
|
llm,
|
|
26
32
|
llm_model,
|
|
27
33
|
});
|
|
28
|
-
const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm };
|
|
34
|
+
const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm, verbose };
|
|
29
35
|
if (interactive) {
|
|
30
36
|
await createMulmoScriptInteractively(context);
|
|
31
37
|
}
|
|
38
|
+
if (inputFile) {
|
|
39
|
+
await createMulmoScriptFromFile(inputFile, context);
|
|
40
|
+
}
|
|
32
41
|
else {
|
|
33
42
|
context.urls = await getUrlsIfNeeded(urls);
|
|
34
43
|
await createMulmoScriptFromUrl(context);
|
|
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
10
10
|
} & {
|
|
11
11
|
beats_per_scene: number;
|
|
12
12
|
} & {
|
|
13
|
-
llm: "
|
|
13
|
+
llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
|
|
14
14
|
} & {
|
|
15
15
|
llm_model: string | undefined;
|
|
16
16
|
} & {
|
package/lib/cli/helpers.js
CHANGED
|
@@ -7,7 +7,7 @@ import { isHttp } from "../utils/utils.js";
|
|
|
7
7
|
import { createOrUpdateStudioData } from "../utils/preprocess.js";
|
|
8
8
|
import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
|
|
9
9
|
import { translate } from "../actions/translate.js";
|
|
10
|
-
import { mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
|
|
10
|
+
import { mulmoCaptionParamsSchema, mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
|
|
11
11
|
export const setGraphAILogger = (verbose, logValues) => {
|
|
12
12
|
if (verbose) {
|
|
13
13
|
if (logValues) {
|
|
@@ -126,13 +126,18 @@ export const initializeContext = async (argv) => {
|
|
|
126
126
|
// validate mulmoStudioSchema. skip if __test_invalid__ is true
|
|
127
127
|
const studio = createOrUpdateStudioData(mulmoScript, currentStudio?.mulmoData, fileName);
|
|
128
128
|
const multiLingual = getMultiLingual(outputMultilingualFilePath, studio.beats.length);
|
|
129
|
+
if (argv.c) {
|
|
130
|
+
studio.script.captionParams = mulmoCaptionParamsSchema.parse({
|
|
131
|
+
...(studio.script.captionParams ?? {}),
|
|
132
|
+
lang: argv.c,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
129
135
|
return {
|
|
130
136
|
studio,
|
|
131
137
|
fileDirs: files,
|
|
132
138
|
force: Boolean(argv.f),
|
|
133
139
|
dryRun: Boolean(argv.dryRun),
|
|
134
140
|
lang: argv.l,
|
|
135
|
-
caption: argv.c,
|
|
136
141
|
sessionState: {
|
|
137
142
|
inSession: {
|
|
138
143
|
audio: false,
|
|
@@ -160,7 +165,7 @@ export const initializeContext = async (argv) => {
|
|
|
160
165
|
}
|
|
161
166
|
};
|
|
162
167
|
export const runTranslateIfNeeded = async (context, argv) => {
|
|
163
|
-
if (argv.l ||
|
|
168
|
+
if (argv.l || context.studio.script.captionParams?.lang) {
|
|
164
169
|
GraphAILogger.log("run translate");
|
|
165
170
|
await translate(context);
|
|
166
171
|
}
|