mulmocast 0.0.22 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +5 -0
  2. package/lib/actions/captions.js +1 -1
  3. package/lib/actions/images.d.ts +7 -3
  4. package/lib/actions/images.js +61 -17
  5. package/lib/actions/movie.d.ts +2 -2
  6. package/lib/actions/movie.js +21 -6
  7. package/lib/agents/combine_audio_files_agent.js +9 -5
  8. package/lib/agents/index.d.ts +2 -1
  9. package/lib/agents/index.js +2 -1
  10. package/lib/agents/movie_replicate_agent.d.ts +23 -0
  11. package/lib/agents/movie_replicate_agent.js +93 -0
  12. package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
  13. package/lib/cli/commands/tool/scripting/builder.js +5 -0
  14. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
  15. package/lib/cli/commands/tool/scripting/handler.js +13 -4
  16. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  17. package/lib/cli/helpers.js +8 -3
  18. package/lib/methods/mulmo_presentation_style.d.ts +2 -1
  19. package/lib/methods/mulmo_presentation_style.js +21 -2
  20. package/lib/methods/mulmo_studio_context.js +1 -1
  21. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
  22. package/lib/tools/create_mulmo_script_from_url.js +129 -43
  23. package/lib/types/schema.d.ts +1123 -163
  24. package/lib/types/schema.js +38 -1
  25. package/lib/types/type.d.ts +9 -2
  26. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  27. package/lib/utils/ffmpeg_utils.js +2 -2
  28. package/lib/utils/preprocess.d.ts +41 -6
  29. package/lib/utils/utils.d.ts +1 -0
  30. package/lib/utils/utils.js +5 -0
  31. package/package.json +3 -2
  32. package/scripts/templates/presentation.json +123 -0
  33. package/scripts/templates/presentation.json~ +119 -0
package/README.md CHANGED
@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
103
103
 
104
104
  See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
105
105
 
106
+ #### (Optional) For Movie models
107
+ ```bash
108
+ REPLICATE_API_TOKEN=your_replicate_api_key
109
+ ```
110
+
106
111
  #### (Optional) For TTS models
107
112
  ```bash
108
113
  # For Nijivoice TTS
@@ -61,7 +61,7 @@ const graph_data = {
61
61
  },
62
62
  };
63
63
  export const captions = async (context, callbacks) => {
64
- if (context.caption) {
64
+ if (MulmoStudioContextMethods.getCaption(context)) {
65
65
  try {
66
66
  MulmoStudioContextMethods.setSessionState(context, "caption", true);
67
67
  const graph = new GraphAI(graph_data, { ...vanillaAgents });
@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
8
8
  imageRefs: Record<string, string>;
9
9
  }) => Promise<{
10
10
  imageParams: {
11
- model?: string | undefined;
12
11
  style?: string | undefined;
12
+ model?: string | undefined;
13
13
  moderation?: string | undefined;
14
14
  images?: Record<string, {
15
15
  type: "image";
@@ -32,16 +32,18 @@ export declare const imagePreprocessAgent: (namedInputs: {
32
32
  imagePath: string | undefined;
33
33
  referenceImage: string | undefined;
34
34
  htmlPrompt?: undefined;
35
+ htmlSystemPrompt?: undefined;
35
36
  } | {
36
37
  imagePath: string;
37
38
  htmlPrompt: string;
39
+ htmlSystemPrompt: string[];
38
40
  } | {
39
41
  imagePath: string;
40
42
  images: string[];
41
43
  imageFromMovie: boolean;
42
44
  imageParams: {
43
- model?: string | undefined;
44
45
  style?: string | undefined;
46
+ model?: string | undefined;
45
47
  moderation?: string | undefined;
46
48
  images?: Record<string, {
47
49
  type: "image";
@@ -62,11 +64,12 @@ export declare const imagePreprocessAgent: (namedInputs: {
62
64
  };
63
65
  movieFile: string | undefined;
64
66
  htmlPrompt?: undefined;
67
+ htmlSystemPrompt?: undefined;
65
68
  } | {
66
69
  images: string[];
67
70
  imageParams: {
68
- model?: string | undefined;
69
71
  style?: string | undefined;
72
+ model?: string | undefined;
70
73
  moderation?: string | undefined;
71
74
  images?: Record<string, {
72
75
  type: "image";
@@ -90,6 +93,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
90
93
  referenceImage: string;
91
94
  prompt: string;
92
95
  htmlPrompt?: undefined;
96
+ htmlSystemPrompt?: undefined;
93
97
  }>;
94
98
  export declare const imagePluginAgent: (namedInputs: {
95
99
  context: MulmoStudioContext;
@@ -4,10 +4,11 @@ import { GraphAI, GraphAILogger } from "graphai";
4
4
  import { TaskManager } from "graphai/lib/task_manager.js";
5
5
  import * as agents from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
7
+ import { anthropicAgent } from "@graphai/anthropic_agent";
7
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
8
9
  import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
9
10
  import { fileCacheAgentFilter } from "../utils/filters.js";
10
- import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
11
+ import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
11
12
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
12
13
  import { findImagePlugin } from "../utils/image_plugins/index.js";
13
14
  import { imagePrompt } from "../utils/prompt.js";
@@ -15,7 +16,6 @@ import { defaultOpenAIImageModel } from "../utils/const.js";
15
16
  import { renderHTMLToImage } from "../utils/markdown.js";
16
17
  const vanillaAgents = agents.default ?? agents;
17
18
  dotenv.config();
18
- // const openai = new OpenAI();
19
19
  import { GoogleAuth } from "google-auth-library";
20
20
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
21
21
  const htmlStyle = (context, beat) => {
@@ -43,7 +43,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
43
43
  }
44
44
  if (beat.htmlPrompt) {
45
45
  const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
46
- return { imagePath, htmlPrompt };
46
+ const htmlSystemPrompt = [
47
+ "Based on the provided information, create a single slide HTML page using Tailwind CSS.",
48
+ `The view port size is ${context.presentationStyle.canvasSize.width}x${context.presentationStyle.canvasSize.height}. Make sure the HTML fits within the view port.`,
49
+ "If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
50
+ "Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
51
+ "Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
52
+ "If data is provided, use it effectively to populate the slide.",
53
+ ];
54
+ return { imagePath, htmlPrompt, htmlSystemPrompt };
47
55
  }
48
56
  // images for "edit_image"
49
57
  const images = (() => {
@@ -85,6 +93,7 @@ const beat_graph_data = {
85
93
  nodes: {
86
94
  context: {},
87
95
  imageAgentInfo: {},
96
+ htmlImageAgentInfo: {},
88
97
  movieAgentInfo: {},
89
98
  imageRefs: {},
90
99
  beat: {},
@@ -113,25 +122,21 @@ const beat_graph_data = {
113
122
  htmlImageAgent: {
114
123
  if: ":preprocessor.htmlPrompt",
115
124
  defaultValue: {},
116
- agent: "openAIAgent",
125
+ agent: ":htmlImageAgentInfo.agent",
126
+ params: {
127
+ mode: ":htmlImageAgentInfo.model",
128
+ },
117
129
  inputs: {
118
130
  prompt: ":preprocessor.htmlPrompt",
119
- system: [
120
- "Based on the provided information, create a single slide HTML page using Tailwind CSS.",
121
- "If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
122
- "Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
123
- "Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
124
- "If data is provided, use it effectively to populate the slide.",
125
- ],
131
+ system: ":preprocessor.htmlSystemPrompt",
126
132
  },
127
133
  },
128
134
  htmlImageGenerator: {
129
135
  if: ":preprocessor.htmlPrompt",
130
136
  defaultValue: {},
131
137
  agent: htmlImageGeneratorAgent,
132
- // console: { before: true, after: true },
133
138
  inputs: {
134
- html: ":htmlImageAgent.text.codeBlock()",
139
+ html: ":htmlImageAgent.text.codeBlockOrRaw()",
135
140
  canvasSize: ":context.presentationStyle.canvasSize",
136
141
  file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
137
142
  mulmoContext: ":context", // for fileCacheAgentFilter
@@ -213,6 +218,7 @@ const graph_data = {
213
218
  nodes: {
214
219
  context: {},
215
220
  imageAgentInfo: {},
221
+ htmlImageAgentInfo: {},
216
222
  movieAgentInfo: {},
217
223
  outputStudioFilePath: {},
218
224
  imageRefs: {},
@@ -222,6 +228,7 @@ const graph_data = {
222
228
  rows: ":context.studio.script.beats",
223
229
  context: ":context",
224
230
  imageAgentInfo: ":imageAgentInfo",
231
+ htmlImageAgentInfo: ":htmlImageAgentInfo",
225
232
  movieAgentInfo: ":movieAgentInfo",
226
233
  imageRefs: ":imageRefs",
227
234
  },
@@ -268,7 +275,6 @@ const graph_data = {
268
275
  },
269
276
  },
270
277
  writeOutput: {
271
- // console: { before: true },
272
278
  agent: "fileWriteAgent",
273
279
  inputs: {
274
280
  file: ":outputStudioFilePath",
@@ -370,13 +376,28 @@ const prepareGenerateImages = async (context) => {
370
376
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
371
377
  mkdir(imageProjectDirPath);
372
378
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
379
+ const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
373
380
  const imageRefs = await getImageRefs(context);
381
+ // Determine movie agent based on provider
382
+ const getMovieAgent = () => {
383
+ if (context.dryRun)
384
+ return "mediaMockAgent";
385
+ const provider = context.presentationStyle.movieParams?.provider ?? "google";
386
+ switch (provider) {
387
+ case "replicate":
388
+ return "movieReplicateAgent";
389
+ case "google":
390
+ default:
391
+ return "movieGoogleAgent";
392
+ }
393
+ };
374
394
  GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
375
395
  const injections = {
376
396
  context,
377
397
  imageAgentInfo,
398
+ htmlImageAgentInfo,
378
399
  movieAgentInfo: {
379
- agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
400
+ agent: getMovieAgent(),
380
401
  },
381
402
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
382
403
  imageRefs,
@@ -384,6 +405,9 @@ const prepareGenerateImages = async (context) => {
384
405
  return injections;
385
406
  };
386
407
  const getConcurrency = (context) => {
408
+ if (context.presentationStyle.movieParams?.provider === "replicate") {
409
+ return 4;
410
+ }
387
411
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
388
412
  if (imageAgentInfo.provider === "openai") {
389
413
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -396,7 +420,17 @@ const getConcurrency = (context) => {
396
420
  const generateImages = async (context, callbacks) => {
397
421
  const options = await graphOption(context);
398
422
  const injections = await prepareGenerateImages(context);
399
- const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
423
+ const graph = new GraphAI(graph_data, {
424
+ ...vanillaAgents,
425
+ imageGoogleAgent,
426
+ movieGoogleAgent,
427
+ movieReplicateAgent,
428
+ imageOpenaiAgent,
429
+ mediaMockAgent,
430
+ fileWriteAgent,
431
+ openAIAgent,
432
+ anthropicAgent,
433
+ }, options);
400
434
  Object.keys(injections).forEach((key) => {
401
435
  graph.injectValue(key, injections[key]);
402
436
  });
@@ -423,7 +457,17 @@ export const images = async (context, callbacks) => {
423
457
  export const generateBeatImage = async (index, context, callbacks) => {
424
458
  const options = await graphOption(context);
425
459
  const injections = await prepareGenerateImages(context);
426
- const graph = new GraphAI(beat_graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
460
+ const graph = new GraphAI(beat_graph_data, {
461
+ ...vanillaAgents,
462
+ imageGoogleAgent,
463
+ movieGoogleAgent,
464
+ movieReplicateAgent,
465
+ imageOpenaiAgent,
466
+ mediaMockAgent,
467
+ fileWriteAgent,
468
+ openAIAgent,
469
+ anthropicAgent,
470
+ }, options);
427
471
  Object.keys(injections).forEach((key) => {
428
472
  if ("outputStudioFilePath" !== key) {
429
473
  graph.injectValue(key, injections[key]);
@@ -1,5 +1,5 @@
1
- import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
2
- export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
1
+ import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
2
+ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
3
3
  videoId: string;
4
4
  videoPart: string;
5
5
  };
@@ -1,12 +1,12 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- import { mulmoTransitionSchema } from "../types/index.js";
2
+ import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
3
3
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
5
5
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
6
6
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
7
7
  // const isMac = process.platform === "darwin";
8
8
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
9
- export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
9
+ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
10
10
  const videoId = `v${inputIndex}`;
11
11
  const videoFilters = [];
12
12
  // Handle different media types
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
19
19
  videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
20
20
  }
21
21
  // Common filters for all media types
22
- videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
23
- // In case of the aspect ratio mismatch, we fill the extra space with black color.
24
- `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
22
+ videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
23
+ // Apply scaling based on fill option
24
+ if (fillOption.style === "aspectFill") {
25
+ // For aspect fill: scale to fill the canvas completely, cropping if necessary
26
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
27
+ }
28
+ else {
29
+ // For aspect fit: scale to fit within canvas, padding if necessary
30
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
31
+ // In case of the aspect ratio mismatch, we fill the extra space with black color.
32
+ `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
33
+ }
34
+ videoFilters.push("setsar=1", "format=yuv420p");
25
35
  return {
26
36
  videoId,
27
37
  videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
@@ -95,7 +105,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
95
105
  return 0;
96
106
  })();
97
107
  const duration = studioBeat.duration + extraPadding;
98
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
108
+ // Get fillOption from merged imageParams (global + beat-specific)
109
+ const globalFillOption = context.presentationStyle.movieParams?.fillOption;
110
+ const beatFillOption = beat.movieParams?.fillOption;
111
+ const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
112
+ const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
113
+ const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
99
114
  ffmpegContext.filterComplex.push(videoPart);
100
115
  if (caption && studioBeat.captionFile) {
101
116
  const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
@@ -1,4 +1,4 @@
1
- import { assert } from "graphai";
1
+ import { assert, GraphAILogger } from "graphai";
2
2
  import { silent60secPath } from "../utils/file.js";
3
3
  import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
4
4
  const getMovieDulation = async (beat) => {
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
77
77
  const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
78
78
  // Yes, the current beat has spilled over audio.
79
79
  const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
80
- if (beatsTotalDuration > audioDuration) {
80
+ if (beatsTotalDuration > audioDuration + 0.01) {
81
+ // 0.01 is a tolerance to avoid floating point precision issues
81
82
  group.reduce((remaining, idx, iGroup) => {
82
83
  if (remaining >= groupBeatsDurations[iGroup]) {
83
84
  return remaining - groupBeatsDurations[iGroup];
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
88
89
  }
89
90
  else {
90
91
  // Last beat gets the rest of the audio.
91
- groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
92
+ if (audioDuration > beatsTotalDuration) {
93
+ groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
94
+ }
92
95
  }
93
96
  beatDurations.push(...groupBeatsDurations);
94
97
  }
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
98
101
  // padding is the amount of audio padding specified in the script.
99
102
  const padding = getPadding(context, beat, index);
100
103
  // totalPadding is the amount of audio padding to be added to the audio file.
101
- const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
104
+ const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
102
105
  const beatDuration = audioDuration + totalPadding;
103
106
  beatDurations.push(beatDuration);
104
107
  if (totalPadding > 0) {
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
124
127
  // We cannot reuse longSilentId. We need to explicitly split it for each beat.
125
128
  const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
126
129
  if (silentIds.length > 0) {
127
- const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
130
+ const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
128
131
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
129
132
  }
130
133
  const inputIds = [];
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
142
145
  }
143
146
  });
144
147
  assert(silentIds.length === 0, "silentIds.length !== 0");
148
+ GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
145
149
  // Finally, combine all audio files.
146
150
  ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
147
151
  await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
13
14
  import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
16
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
17
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  // import * as vanilla from "@graphai/vanilla";
16
17
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
17
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
18
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -0,0 +1,23 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ export declare const getAspectRatio: (canvasSize: {
3
+ width: number;
4
+ height: number;
5
+ }) => string;
6
+ export type MovieReplicateConfig = {
7
+ apiKey?: string;
8
+ };
9
+ export declare const movieReplicateAgent: AgentFunction<{
10
+ model: `${string}/${string}` | undefined;
11
+ canvasSize: {
12
+ width: number;
13
+ height: number;
14
+ };
15
+ duration?: number;
16
+ }, {
17
+ buffer: Buffer;
18
+ }, {
19
+ prompt: string;
20
+ imagePath?: string;
21
+ }, MovieReplicateConfig>;
22
+ declare const movieReplicateAgentInfo: AgentFunctionInfo;
23
+ export default movieReplicateAgentInfo;
@@ -0,0 +1,93 @@
1
+ import { readFileSync } from "fs";
2
+ import { GraphAILogger } from "graphai";
3
+ import Replicate from "replicate";
4
+ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
5
+ const replicate = new Replicate({
6
+ auth: apiKey,
7
+ });
8
+ const input = {
9
+ prompt: prompt,
10
+ duration: duration,
11
+ image: undefined,
12
+ start_image: undefined,
13
+ aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
14
+ // resolution: "720p", // only for bytedance/seedance-1-lite
15
+ // fps: 24, // only for bytedance/seedance-1-lite
16
+ // camera_fixed: false, // only for bytedance/seedance-1-lite
17
+ // mode: "standard" // only for kwaivgi/kling-v2.1
18
+ // negative_prompt: "" // only for kwaivgi/kling-v2.1
19
+ };
20
+ // Add image if provided (for image-to-video generation)
21
+ if (imagePath) {
22
+ const buffer = readFileSync(imagePath);
23
+ const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
24
+ if (model === "kwaivgi/kling-v2.1") {
25
+ input.start_image = base64Image;
26
+ }
27
+ else {
28
+ input.image = base64Image;
29
+ }
30
+ }
31
+ try {
32
+ const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
33
+ // Download the generated video
34
+ if (output && typeof output === "object" && "url" in output) {
35
+ const videoUrl = output.url();
36
+ const videoResponse = await fetch(videoUrl);
37
+ if (!videoResponse.ok) {
38
+ throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
39
+ }
40
+ const arrayBuffer = await videoResponse.arrayBuffer();
41
+ return Buffer.from(arrayBuffer);
42
+ }
43
+ return undefined;
44
+ }
45
+ catch (error) {
46
+ GraphAILogger.info("Replicate generation error:", error);
47
+ throw error;
48
+ }
49
+ }
50
+ export const getAspectRatio = (canvasSize) => {
51
+ if (canvasSize.width > canvasSize.height) {
52
+ return "16:9";
53
+ }
54
+ else if (canvasSize.width < canvasSize.height) {
55
+ return "9:16";
56
+ }
57
+ else {
58
+ return "1:1";
59
+ }
60
+ };
61
+ export const movieReplicateAgent = async ({ namedInputs, params, config }) => {
62
+ const { prompt, imagePath } = namedInputs;
63
+ const aspectRatio = getAspectRatio(params.canvasSize);
64
+ const duration = params.duration ?? 5;
65
+ const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
66
+ if (!apiKey) {
67
+ throw new Error("REPLICATE_API_TOKEN environment variable is required");
68
+ }
69
+ try {
70
+ const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
71
+ if (buffer) {
72
+ return { buffer };
73
+ }
74
+ throw new Error("ERROR: generateMovie returned undefined");
75
+ }
76
+ catch (error) {
77
+ GraphAILogger.info("Failed to generate movie:", error.message);
78
+ throw error;
79
+ }
80
+ };
81
+ const movieReplicateAgentInfo = {
82
+ name: "movieReplicateAgent",
83
+ agent: movieReplicateAgent,
84
+ mock: movieReplicateAgent,
85
+ samples: [],
86
+ description: "Replicate Movie agent using seedance-1-lite",
87
+ category: ["movie"],
88
+ author: "Receptron Team",
89
+ repository: "https://github.com/receptron/mulmocast-cli/",
90
+ license: "MIT",
91
+ environmentVariables: ["REPLICATE_API_TOKEN"],
92
+ };
93
+ export default movieReplicateAgentInfo;
@@ -5,6 +5,8 @@ export declare const builder: (yargs: Argv) => Argv<{
5
5
  b: string | undefined;
6
6
  } & {
7
7
  u: string[] | never[];
8
+ } & {
9
+ "input-file": string | undefined;
8
10
  } & {
9
11
  i: boolean | undefined;
10
12
  } & {
@@ -14,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
14
16
  } & {
15
17
  s: string;
16
18
  } & {
17
- llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
19
+ llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
18
20
  } & {
19
21
  llm_model: string | undefined;
20
22
  }>;
@@ -22,6 +22,11 @@ export const builder = (yargs) => {
22
22
  default: [],
23
23
  type: "array",
24
24
  string: true,
25
+ })
26
+ .option("input-file", {
27
+ description: "input file name",
28
+ demandOption: false,
29
+ type: "string",
25
30
  })
26
31
  .option("i", {
27
32
  alias: "interactive",
@@ -6,6 +6,7 @@ export declare const handler: (argv: ToolCliArgs<{
6
6
  u?: string[];
7
7
  i?: boolean;
8
8
  t?: string;
9
+ "input-file"?: string;
9
10
  c?: string;
10
11
  s?: string;
11
12
  llm?: LLM;
@@ -1,18 +1,23 @@
1
1
  import { getBaseDirPath, getFullPath } from "../../../../utils/file.js";
2
2
  import { outDirName, cacheDirName } from "../../../../utils/const.js";
3
3
  import { getUrlsIfNeeded, selectTemplate } from "../../../../utils/inquirer.js";
4
- import { createMulmoScriptFromUrl } from "../../../../tools/create_mulmo_script_from_url.js";
4
+ import { createMulmoScriptFromUrl, createMulmoScriptFromFile } from "../../../../tools/create_mulmo_script_from_url.js";
5
5
  import { createMulmoScriptInteractively } from "../../../../tools/create_mulmo_script_interactively.js";
6
6
  import { setGraphAILogger } from "../../../../cli/helpers.js";
7
7
  export const handler = async (argv) => {
8
- const { o: outdir, b: basedir, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
8
+ const { o: outdir, b: basedir, "input-file": inputFile, v: verbose, i: interactive, c: cache, s: filename, llm, llm_model } = argv;
9
9
  let { t: template } = argv;
10
10
  const urls = argv.u || [];
11
11
  const baseDirPath = getBaseDirPath(basedir);
12
12
  const outDirPath = getFullPath(baseDirPath, outdir ?? outDirName);
13
13
  const cacheDirPath = getFullPath(outDirPath, cache ?? cacheDirName);
14
14
  if (!template) {
15
- template = await selectTemplate();
15
+ if (interactive) {
16
+ template = await selectTemplate();
17
+ }
18
+ else {
19
+ template = "business";
20
+ }
16
21
  }
17
22
  setGraphAILogger(verbose, {
18
23
  baseDirPath,
@@ -22,13 +27,17 @@ export const handler = async (argv) => {
22
27
  urls,
23
28
  interactive,
24
29
  filename,
30
+ inputFile,
25
31
  llm,
26
32
  llm_model,
27
33
  });
28
- const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm };
34
+ const context = { outDirPath, templateName: template, urls, filename: filename, cacheDirPath, llm_model, llm, verbose };
29
35
  if (interactive) {
30
36
  await createMulmoScriptInteractively(context);
31
37
  }
38
+ if (inputFile) {
39
+ await createMulmoScriptFromFile(inputFile, context);
40
+ }
32
41
  else {
33
42
  context.urls = await getUrlsIfNeeded(urls);
34
43
  await createMulmoScriptFromUrl(context);
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
10
10
  } & {
11
11
  beats_per_scene: number;
12
12
  } & {
13
- llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
13
+ llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
14
14
  } & {
15
15
  llm_model: string | undefined;
16
16
  } & {
@@ -7,7 +7,7 @@ import { isHttp } from "../utils/utils.js";
7
7
  import { createOrUpdateStudioData } from "../utils/preprocess.js";
8
8
  import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
9
9
  import { translate } from "../actions/translate.js";
10
- import { mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
10
+ import { mulmoCaptionParamsSchema, mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
11
11
  export const setGraphAILogger = (verbose, logValues) => {
12
12
  if (verbose) {
13
13
  if (logValues) {
@@ -126,13 +126,18 @@ export const initializeContext = async (argv) => {
126
126
  // validate mulmoStudioSchema. skip if __test_invalid__ is true
127
127
  const studio = createOrUpdateStudioData(mulmoScript, currentStudio?.mulmoData, fileName);
128
128
  const multiLingual = getMultiLingual(outputMultilingualFilePath, studio.beats.length);
129
+ if (argv.c) {
130
+ studio.script.captionParams = mulmoCaptionParamsSchema.parse({
131
+ ...(studio.script.captionParams ?? {}),
132
+ lang: argv.c,
133
+ });
134
+ }
129
135
  return {
130
136
  studio,
131
137
  fileDirs: files,
132
138
  force: Boolean(argv.f),
133
139
  dryRun: Boolean(argv.dryRun),
134
140
  lang: argv.l,
135
- caption: argv.c,
136
141
  sessionState: {
137
142
  inSession: {
138
143
  audio: false,
@@ -160,7 +165,7 @@ export const initializeContext = async (argv) => {
160
165
  }
161
166
  };
162
167
  export const runTranslateIfNeeded = async (context, argv) => {
163
- if (argv.l || argv.c) {
168
+ if (argv.l || context.studio.script.captionParams?.lang) {
164
169
  GraphAILogger.log("run translate");
165
170
  await translate(context);
166
171
  }