mulmocast 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +5 -0
  2. package/lib/actions/captions.js +1 -1
  3. package/lib/actions/images.d.ts +13 -3
  4. package/lib/actions/images.js +91 -9
  5. package/lib/actions/movie.d.ts +2 -2
  6. package/lib/actions/movie.js +21 -6
  7. package/lib/agents/add_bgm_agent.js +1 -1
  8. package/lib/agents/combine_audio_files_agent.js +9 -5
  9. package/lib/agents/index.d.ts +2 -1
  10. package/lib/agents/index.js +2 -1
  11. package/lib/agents/movie_replicate_agent.d.ts +23 -0
  12. package/lib/agents/movie_replicate_agent.js +93 -0
  13. package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
  14. package/lib/cli/commands/tool/scripting/builder.js +5 -0
  15. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
  16. package/lib/cli/commands/tool/scripting/handler.js +13 -4
  17. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  18. package/lib/cli/helpers.js +8 -3
  19. package/lib/methods/mulmo_presentation_style.d.ts +2 -1
  20. package/lib/methods/mulmo_presentation_style.js +21 -2
  21. package/lib/methods/mulmo_studio_context.js +1 -1
  22. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
  23. package/lib/tools/create_mulmo_script_from_url.js +129 -43
  24. package/lib/types/schema.d.ts +1261 -165
  25. package/lib/types/schema.js +47 -1
  26. package/lib/types/type.d.ts +9 -2
  27. package/lib/utils/ffmpeg_utils.d.ts +2 -2
  28. package/lib/utils/ffmpeg_utils.js +9 -4
  29. package/lib/utils/preprocess.d.ts +47 -6
  30. package/lib/utils/utils.d.ts +1 -0
  31. package/lib/utils/utils.js +5 -0
  32. package/package.json +3 -2
  33. package/scripts/templates/presentation.json +123 -0
  34. package/scripts/templates/presentation.json~ +119 -0
package/README.md CHANGED
@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
103
103
 
104
104
  See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
105
105
 
106
+ #### (Optional) For Movie models
107
+ ```bash
108
+ REPLICATE_API_TOKEN=your_replicate_api_key
109
+ ```
110
+
106
111
  #### (Optional) For TTS models
107
112
  ```bash
108
113
  # For Nijivoice TTS
@@ -61,7 +61,7 @@ const graph_data = {
61
61
  },
62
62
  };
63
63
  export const captions = async (context, callbacks) => {
64
- if (context.caption) {
64
+ if (MulmoStudioContextMethods.getCaption(context)) {
65
65
  try {
66
66
  MulmoStudioContextMethods.setSessionState(context, "caption", true);
67
67
  const graph = new GraphAI(graph_data, { ...vanillaAgents });
@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
8
8
  imageRefs: Record<string, string>;
9
9
  }) => Promise<{
10
10
  imageParams: {
11
- model?: string | undefined;
12
11
  style?: string | undefined;
12
+ model?: string | undefined;
13
13
  moderation?: string | undefined;
14
14
  images?: Record<string, {
15
15
  type: "image";
@@ -31,13 +31,19 @@ export declare const imagePreprocessAgent: (namedInputs: {
31
31
  movieFile: string | undefined;
32
32
  imagePath: string | undefined;
33
33
  referenceImage: string | undefined;
34
+ htmlPrompt?: undefined;
35
+ htmlSystemPrompt?: undefined;
36
+ } | {
37
+ imagePath: string;
38
+ htmlPrompt: string;
39
+ htmlSystemPrompt: string[];
34
40
  } | {
35
41
  imagePath: string;
36
42
  images: string[];
37
43
  imageFromMovie: boolean;
38
44
  imageParams: {
39
- model?: string | undefined;
40
45
  style?: string | undefined;
46
+ model?: string | undefined;
41
47
  moderation?: string | undefined;
42
48
  images?: Record<string, {
43
49
  type: "image";
@@ -57,11 +63,13 @@ export declare const imagePreprocessAgent: (namedInputs: {
57
63
  }> | undefined;
58
64
  };
59
65
  movieFile: string | undefined;
66
+ htmlPrompt?: undefined;
67
+ htmlSystemPrompt?: undefined;
60
68
  } | {
61
69
  images: string[];
62
70
  imageParams: {
63
- model?: string | undefined;
64
71
  style?: string | undefined;
72
+ model?: string | undefined;
65
73
  moderation?: string | undefined;
66
74
  images?: Record<string, {
67
75
  type: "image";
@@ -84,6 +92,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
84
92
  imagePath: string;
85
93
  referenceImage: string;
86
94
  prompt: string;
95
+ htmlPrompt?: undefined;
96
+ htmlSystemPrompt?: undefined;
87
97
  }>;
88
98
  export declare const imagePluginAgent: (namedInputs: {
89
99
  context: MulmoStudioContext;
@@ -3,17 +3,19 @@ import fs from "fs";
3
3
  import { GraphAI, GraphAILogger } from "graphai";
4
4
  import { TaskManager } from "graphai/lib/task_manager.js";
5
5
  import * as agents from "@graphai/vanilla";
6
+ import { openAIAgent } from "@graphai/openai_agent";
7
+ import { anthropicAgent } from "@graphai/anthropic_agent";
6
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
7
9
  import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
8
10
  import { fileCacheAgentFilter } from "../utils/filters.js";
9
- import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
11
+ import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
10
12
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
13
  import { findImagePlugin } from "../utils/image_plugins/index.js";
12
14
  import { imagePrompt } from "../utils/prompt.js";
13
15
  import { defaultOpenAIImageModel } from "../utils/const.js";
16
+ import { renderHTMLToImage } from "../utils/markdown.js";
14
17
  const vanillaAgents = agents.default ?? agents;
15
18
  dotenv.config();
16
- // const openai = new OpenAI();
17
19
  import { GoogleAuth } from "google-auth-library";
18
20
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
19
21
  const htmlStyle = (context, beat) => {
@@ -39,6 +41,18 @@ export const imagePreprocessAgent = async (namedInputs) => {
39
41
  // undefined prompt indicates that image generation is not needed
40
42
  return { imagePath: path, referenceImage: path, ...returnValue };
41
43
  }
44
+ if (beat.htmlPrompt) {
45
+ const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
46
+ const htmlSystemPrompt = [
47
+ "Based on the provided information, create a single slide HTML page using Tailwind CSS.",
48
+ `The view port size is ${context.presentationStyle.canvasSize.width}x${context.presentationStyle.canvasSize.height}. Make sure the HTML fits within the view port.`,
49
+ "If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
50
+ "Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
51
+ "Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
52
+ "If data is provided, use it effectively to populate the slide.",
53
+ ];
54
+ return { imagePath, htmlPrompt, htmlSystemPrompt };
55
+ }
42
56
  // images for "edit_image"
43
57
  const images = (() => {
44
58
  const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
@@ -69,12 +83,17 @@ export const imagePluginAgent = async (namedInputs) => {
69
83
  throw error;
70
84
  }
71
85
  };
86
+ const htmlImageGeneratorAgent = async (namedInputs) => {
87
+ const { html, file, canvasSize } = namedInputs;
88
+ await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
89
+ };
72
90
  const beat_graph_data = {
73
91
  version: 0.5,
74
92
  concurrency: 4,
75
93
  nodes: {
76
94
  context: {},
77
95
  imageAgentInfo: {},
96
+ htmlImageAgentInfo: {},
78
97
  movieAgentInfo: {},
79
98
  imageRefs: {},
80
99
  beat: {},
@@ -100,6 +119,31 @@ const beat_graph_data = {
100
119
  onComplete: ":preprocessor",
101
120
  },
102
121
  },
122
+ htmlImageAgent: {
123
+ if: ":preprocessor.htmlPrompt",
124
+ defaultValue: {},
125
+ agent: ":htmlImageAgentInfo.agent",
126
+ params: {
127
+ mode: ":htmlImageAgentInfo.model",
128
+ },
129
+ inputs: {
130
+ prompt: ":preprocessor.htmlPrompt",
131
+ system: ":preprocessor.htmlSystemPrompt",
132
+ },
133
+ },
134
+ htmlImageGenerator: {
135
+ if: ":preprocessor.htmlPrompt",
136
+ defaultValue: {},
137
+ agent: htmlImageGeneratorAgent,
138
+ inputs: {
139
+ html: ":htmlImageAgent.text.codeBlockOrRaw()",
140
+ canvasSize: ":context.presentationStyle.canvasSize",
141
+ file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
142
+ mulmoContext: ":context", // for fileCacheAgentFilter
143
+ index: ":__mapIndex", // for fileCacheAgentFilter
144
+ sessionType: "image", // for fileCacheAgentFilter
145
+ },
146
+ },
103
147
  imageGenerator: {
104
148
  if: ":preprocessor.prompt",
105
149
  agent: ":imageAgentInfo.agent",
@@ -108,7 +152,6 @@ const beat_graph_data = {
108
152
  prompt: ":preprocessor.prompt",
109
153
  images: ":preprocessor.images",
110
154
  file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
111
- text: ":preprocessor.prompt", // only for fileCacheAgentFilter
112
155
  force: ":context.force", // only for fileCacheAgentFilter
113
156
  mulmoContext: ":context", // for fileCacheAgentFilter
114
157
  index: ":__mapIndex", // for fileCacheAgentFilter
@@ -157,7 +200,7 @@ const beat_graph_data = {
157
200
  output: {
158
201
  agent: "copyAgent",
159
202
  inputs: {
160
- onComplete: ":imageFromMovie", // to wait for imageFromMovie to finish
203
+ onComplete: [":imageFromMovie", ":htmlImageGenerator"], // to wait for imageFromMovie to finish
161
204
  imageFile: ":preprocessor.imagePath",
162
205
  movieFile: ":preprocessor.movieFile",
163
206
  },
@@ -175,6 +218,7 @@ const graph_data = {
175
218
  nodes: {
176
219
  context: {},
177
220
  imageAgentInfo: {},
221
+ htmlImageAgentInfo: {},
178
222
  movieAgentInfo: {},
179
223
  outputStudioFilePath: {},
180
224
  imageRefs: {},
@@ -184,6 +228,7 @@ const graph_data = {
184
228
  rows: ":context.studio.script.beats",
185
229
  context: ":context",
186
230
  imageAgentInfo: ":imageAgentInfo",
231
+ htmlImageAgentInfo: ":htmlImageAgentInfo",
187
232
  movieAgentInfo: ":movieAgentInfo",
188
233
  imageRefs: ":imageRefs",
189
234
  },
@@ -230,7 +275,6 @@ const graph_data = {
230
275
  },
231
276
  },
232
277
  writeOutput: {
233
- // console: { before: true },
234
278
  agent: "fileWriteAgent",
235
279
  inputs: {
236
280
  file: ":outputStudioFilePath",
@@ -258,7 +302,7 @@ const graphOption = async (context) => {
258
302
  {
259
303
  name: "fileCacheAgentFilter",
260
304
  agent: fileCacheAgentFilter,
261
- nodeIds: ["imageGenerator", "movieGenerator"],
305
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator"],
262
306
  },
263
307
  ];
264
308
  const taskManager = new TaskManager(getConcurrency(context));
@@ -332,13 +376,28 @@ const prepareGenerateImages = async (context) => {
332
376
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
333
377
  mkdir(imageProjectDirPath);
334
378
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
379
+ const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
335
380
  const imageRefs = await getImageRefs(context);
381
+ // Determine movie agent based on provider
382
+ const getMovieAgent = () => {
383
+ if (context.dryRun)
384
+ return "mediaMockAgent";
385
+ const provider = context.presentationStyle.movieParams?.provider ?? "google";
386
+ switch (provider) {
387
+ case "replicate":
388
+ return "movieReplicateAgent";
389
+ case "google":
390
+ default:
391
+ return "movieGoogleAgent";
392
+ }
393
+ };
336
394
  GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
337
395
  const injections = {
338
396
  context,
339
397
  imageAgentInfo,
398
+ htmlImageAgentInfo,
340
399
  movieAgentInfo: {
341
- agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
400
+ agent: getMovieAgent(),
342
401
  },
343
402
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
344
403
  imageRefs,
@@ -346,6 +405,9 @@ const prepareGenerateImages = async (context) => {
346
405
  return injections;
347
406
  };
348
407
  const getConcurrency = (context) => {
408
+ if (context.presentationStyle.movieParams?.provider === "replicate") {
409
+ return 4;
410
+ }
349
411
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
350
412
  if (imageAgentInfo.provider === "openai") {
351
413
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -358,7 +420,17 @@ const getConcurrency = (context) => {
358
420
  const generateImages = async (context, callbacks) => {
359
421
  const options = await graphOption(context);
360
422
  const injections = await prepareGenerateImages(context);
361
- const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
423
+ const graph = new GraphAI(graph_data, {
424
+ ...vanillaAgents,
425
+ imageGoogleAgent,
426
+ movieGoogleAgent,
427
+ movieReplicateAgent,
428
+ imageOpenaiAgent,
429
+ mediaMockAgent,
430
+ fileWriteAgent,
431
+ openAIAgent,
432
+ anthropicAgent,
433
+ }, options);
362
434
  Object.keys(injections).forEach((key) => {
363
435
  graph.injectValue(key, injections[key]);
364
436
  });
@@ -385,7 +457,17 @@ export const images = async (context, callbacks) => {
385
457
  export const generateBeatImage = async (index, context, callbacks) => {
386
458
  const options = await graphOption(context);
387
459
  const injections = await prepareGenerateImages(context);
388
- const graph = new GraphAI(beat_graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent }, options);
460
+ const graph = new GraphAI(beat_graph_data, {
461
+ ...vanillaAgents,
462
+ imageGoogleAgent,
463
+ movieGoogleAgent,
464
+ movieReplicateAgent,
465
+ imageOpenaiAgent,
466
+ mediaMockAgent,
467
+ fileWriteAgent,
468
+ openAIAgent,
469
+ anthropicAgent,
470
+ }, options);
389
471
  Object.keys(injections).forEach((key) => {
390
472
  if ("outputStudioFilePath" !== key) {
391
473
  graph.injectValue(key, injections[key]);
@@ -1,5 +1,5 @@
1
- import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
2
- export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
1
+ import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
2
+ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
3
3
  videoId: string;
4
4
  videoPart: string;
5
5
  };
@@ -1,12 +1,12 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- import { mulmoTransitionSchema } from "../types/index.js";
2
+ import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
3
3
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
5
5
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
6
6
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
7
7
  // const isMac = process.platform === "darwin";
8
8
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
9
- export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
9
+ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
10
10
  const videoId = `v${inputIndex}`;
11
11
  const videoFilters = [];
12
12
  // Handle different media types
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
19
19
  videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
20
20
  }
21
21
  // Common filters for all media types
22
- videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
23
- // In case of the aspect ratio mismatch, we fill the extra space with black color.
24
- `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
22
+ videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
23
+ // Apply scaling based on fill option
24
+ if (fillOption.style === "aspectFill") {
25
+ // For aspect fill: scale to fill the canvas completely, cropping if necessary
26
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
27
+ }
28
+ else {
29
+ // For aspect fit: scale to fit within canvas, padding if necessary
30
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
31
+ // In case of the aspect ratio mismatch, we fill the extra space with black color.
32
+ `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
33
+ }
34
+ videoFilters.push("setsar=1", "format=yuv420p");
25
35
  return {
26
36
  videoId,
27
37
  videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
@@ -95,7 +105,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
95
105
  return 0;
96
106
  })();
97
107
  const duration = studioBeat.duration + extraPadding;
98
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
108
+ // Get fillOption from merged imageParams (global + beat-specific)
109
+ const globalFillOption = context.presentationStyle.movieParams?.fillOption;
110
+ const beatFillOption = beat.movieParams?.fillOption;
111
+ const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
112
+ const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
113
+ const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
99
114
  ffmpegContext.filterComplex.push(videoPart);
100
115
  if (caption && studioBeat.captionFile) {
101
116
  const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
@@ -9,7 +9,7 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
9
9
  const totalDuration = speechDuration + introPadding + outroPadding;
10
10
  GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
11
11
  const ffmpegContext = FfmpegContextInit();
12
- const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile);
12
+ const musicInputIndex = FfmpegContextAddInput(ffmpegContext, musicFile, ["-stream_loop", "-1"]);
13
13
  const voiceInputIndex = FfmpegContextAddInput(ffmpegContext, voiceFile);
14
14
  ffmpegContext.filterComplex.push(`[${musicInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.bgmVolume}[music]`);
15
15
  ffmpegContext.filterComplex.push(`[${voiceInputIndex}:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo, volume=${context.presentationStyle.audioParams.audioVolume}, adelay=${introPadding * 1000}|${introPadding * 1000}[voice]`);
@@ -1,4 +1,4 @@
1
- import { assert } from "graphai";
1
+ import { assert, GraphAILogger } from "graphai";
2
2
  import { silent60secPath } from "../utils/file.js";
3
3
  import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
4
4
  const getMovieDulation = async (beat) => {
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
77
77
  const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
78
78
  // Yes, the current beat has spilled over audio.
79
79
  const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
80
- if (beatsTotalDuration > audioDuration) {
80
+ if (beatsTotalDuration > audioDuration + 0.01) {
81
+ // 0.01 is a tolerance to avoid floating point precision issues
81
82
  group.reduce((remaining, idx, iGroup) => {
82
83
  if (remaining >= groupBeatsDurations[iGroup]) {
83
84
  return remaining - groupBeatsDurations[iGroup];
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
88
89
  }
89
90
  else {
90
91
  // Last beat gets the rest of the audio.
91
- groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
92
+ if (audioDuration > beatsTotalDuration) {
93
+ groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
94
+ }
92
95
  }
93
96
  beatDurations.push(...groupBeatsDurations);
94
97
  }
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
98
101
  // padding is the amount of audio padding specified in the script.
99
102
  const padding = getPadding(context, beat, index);
100
103
  // totalPadding is the amount of audio padding to be added to the audio file.
101
- const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
104
+ const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
102
105
  const beatDuration = audioDuration + totalPadding;
103
106
  beatDurations.push(beatDuration);
104
107
  if (totalPadding > 0) {
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
124
127
  // We cannot reuse longSilentId. We need to explicitly split it for each beat.
125
128
  const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
126
129
  if (silentIds.length > 0) {
127
- const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
130
+ const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
128
131
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
129
132
  }
130
133
  const inputIds = [];
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
142
145
  }
143
146
  });
144
147
  assert(silentIds.length === 0, "silentIds.length !== 0");
148
+ GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
145
149
  // Finally, combine all audio files.
146
150
  ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
147
151
  await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
13
14
  import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
16
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
17
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  // import * as vanilla from "@graphai/vanilla";
16
17
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
17
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
18
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -0,0 +1,23 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ export declare const getAspectRatio: (canvasSize: {
3
+ width: number;
4
+ height: number;
5
+ }) => string;
6
+ export type MovieReplicateConfig = {
7
+ apiKey?: string;
8
+ };
9
+ export declare const movieReplicateAgent: AgentFunction<{
10
+ model: `${string}/${string}` | undefined;
11
+ canvasSize: {
12
+ width: number;
13
+ height: number;
14
+ };
15
+ duration?: number;
16
+ }, {
17
+ buffer: Buffer;
18
+ }, {
19
+ prompt: string;
20
+ imagePath?: string;
21
+ }, MovieReplicateConfig>;
22
+ declare const movieReplicateAgentInfo: AgentFunctionInfo;
23
+ export default movieReplicateAgentInfo;
@@ -0,0 +1,93 @@
1
+ import { readFileSync } from "fs";
2
+ import { GraphAILogger } from "graphai";
3
+ import Replicate from "replicate";
4
+ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
5
+ const replicate = new Replicate({
6
+ auth: apiKey,
7
+ });
8
+ const input = {
9
+ prompt: prompt,
10
+ duration: duration,
11
+ image: undefined,
12
+ start_image: undefined,
13
+ aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
14
+ // resolution: "720p", // only for bytedance/seedance-1-lite
15
+ // fps: 24, // only for bytedance/seedance-1-lite
16
+ // camera_fixed: false, // only for bytedance/seedance-1-lite
17
+ // mode: "standard" // only for kwaivgi/kling-v2.1
18
+ // negative_prompt: "" // only for kwaivgi/kling-v2.1
19
+ };
20
+ // Add image if provided (for image-to-video generation)
21
+ if (imagePath) {
22
+ const buffer = readFileSync(imagePath);
23
+ const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
24
+ if (model === "kwaivgi/kling-v2.1") {
25
+ input.start_image = base64Image;
26
+ }
27
+ else {
28
+ input.image = base64Image;
29
+ }
30
+ }
31
+ try {
32
+ const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
33
+ // Download the generated video
34
+ if (output && typeof output === "object" && "url" in output) {
35
+ const videoUrl = output.url();
36
+ const videoResponse = await fetch(videoUrl);
37
+ if (!videoResponse.ok) {
38
+ throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
39
+ }
40
+ const arrayBuffer = await videoResponse.arrayBuffer();
41
+ return Buffer.from(arrayBuffer);
42
+ }
43
+ return undefined;
44
+ }
45
+ catch (error) {
46
+ GraphAILogger.info("Replicate generation error:", error);
47
+ throw error;
48
+ }
49
+ }
50
+ export const getAspectRatio = (canvasSize) => {
51
+ if (canvasSize.width > canvasSize.height) {
52
+ return "16:9";
53
+ }
54
+ else if (canvasSize.width < canvasSize.height) {
55
+ return "9:16";
56
+ }
57
+ else {
58
+ return "1:1";
59
+ }
60
+ };
61
+ export const movieReplicateAgent = async ({ namedInputs, params, config }) => {
62
+ const { prompt, imagePath } = namedInputs;
63
+ const aspectRatio = getAspectRatio(params.canvasSize);
64
+ const duration = params.duration ?? 5;
65
+ const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
66
+ if (!apiKey) {
67
+ throw new Error("REPLICATE_API_TOKEN environment variable is required");
68
+ }
69
+ try {
70
+ const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
71
+ if (buffer) {
72
+ return { buffer };
73
+ }
74
+ throw new Error("ERROR: generateMovie returned undefined");
75
+ }
76
+ catch (error) {
77
+ GraphAILogger.info("Failed to generate movie:", error.message);
78
+ throw error;
79
+ }
80
+ };
81
+ const movieReplicateAgentInfo = {
82
+ name: "movieReplicateAgent",
83
+ agent: movieReplicateAgent,
84
+ mock: movieReplicateAgent,
85
+ samples: [],
86
+ description: "Replicate Movie agent using seedance-1-lite",
87
+ category: ["movie"],
88
+ author: "Receptron Team",
89
+ repository: "https://github.com/receptron/mulmocast-cli/",
90
+ license: "MIT",
91
+ environmentVariables: ["REPLICATE_API_TOKEN"],
92
+ };
93
+ export default movieReplicateAgentInfo;
@@ -5,6 +5,8 @@ export declare const builder: (yargs: Argv) => Argv<{
5
5
  b: string | undefined;
6
6
  } & {
7
7
  u: string[] | never[];
8
+ } & {
9
+ "input-file": string | undefined;
8
10
  } & {
9
11
  i: boolean | undefined;
10
12
  } & {
@@ -14,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
14
16
  } & {
15
17
  s: string;
16
18
  } & {
17
- llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
19
+ llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
18
20
  } & {
19
21
  llm_model: string | undefined;
20
22
  }>;
@@ -22,6 +22,11 @@ export const builder = (yargs) => {
22
22
  default: [],
23
23
  type: "array",
24
24
  string: true,
25
+ })
26
+ .option("input-file", {
27
+ description: "input file name",
28
+ demandOption: false,
29
+ type: "string",
25
30
  })
26
31
  .option("i", {
27
32
  alias: "interactive",
@@ -6,6 +6,7 @@ export declare const handler: (argv: ToolCliArgs<{
6
6
  u?: string[];
7
7
  i?: boolean;
8
8
  t?: string;
9
+ "input-file"?: string;
9
10
  c?: string;
10
11
  s?: string;
11
12
  llm?: LLM;
@@ -1,18 +1,23 @@
1
1
  import { getBaseDirPath, getFullPath } from "../../../../utils/file.js";
2
2
  import { outDirName, cacheDirName } from "../../../../utils/const.js";
3
3
  import { getUrlsIfNeeded, selectTemplate } from "../../../../utils/inquirer.js";
4
- import { createMulmoScriptFromUrl } from "../../../../tools/create_mulmo_script_from_url.js";
4
+ import { createMulmoScriptFromUrl, createMulmoScriptFromFile } from "../../../../tools/create_mulmo_script_from_url.js";
5
5
  import { createMulmoScriptInteractively } from "../../../../tools/create_mulmo_script_interactively.js";
6
6
  import { setGraphAILogger } from "../../../../cli/helpers.js";
7
7
  export const handler = async (argv) => {
8
- const { o: outdir, b: basedir, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
8
+ const { o: outdir, b: basedir, "input-file": inputFile, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
9
9
  let { t: template } = argv;
10
10
  const urls = argv.u || [];
11
11
  const baseDirPath = getBaseDirPath(basedir);
12
12
  const outDirPath = getFullPath(baseDirPath, outdir ?? outDirName);
13
13
  const cacheDirPath = getFullPath(outDirPath, cache ?? cacheDirName);
14
14
  if (!template) {
15
- template = await selectTemplate();
15
+ if (interactive) {
16
+ template = await selectTemplate();
17
+ }
18
+ else {
19
+ template = "business";
20
+ }
16
21
  }
17
22
  setGraphAILogger(verbose, {
18
23
  baseDirPath,
@@ -22,13 +27,17 @@ export const handler = async (argv) => {
22
27
  urls,
23
28
  interactive,
24
29
  filename,
30
+ inputFile,
25
31
  llm,
26
32
  llm_model,
27
33
  });
28
- const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm };
34
+ const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm, verbose };
29
35
  if (interactive) {
30
36
  await createMulmoScriptInteractively(context);
31
37
  }
38
+ if (inputFile) {
39
+ await createMulmoScriptFromFile(inputFile, context);
40
+ }
32
41
  else {
33
42
  context.urls = await getUrlsIfNeeded(urls);
34
43
  await createMulmoScriptFromUrl(context);