mulmocast 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +5 -0
  2. package/assets/html/caption.html +2 -0
  3. package/lib/actions/audio.d.ts +2 -2
  4. package/lib/actions/audio.js +8 -7
  5. package/lib/actions/captions.js +7 -5
  6. package/lib/actions/images.d.ts +9 -5
  7. package/lib/actions/images.js +73 -36
  8. package/lib/actions/movie.d.ts +2 -2
  9. package/lib/actions/movie.js +24 -9
  10. package/lib/agents/combine_audio_files_agent.js +9 -5
  11. package/lib/agents/image_openai_agent.d.ts +2 -0
  12. package/lib/agents/image_openai_agent.js +3 -2
  13. package/lib/agents/index.d.ts +2 -1
  14. package/lib/agents/index.js +2 -1
  15. package/lib/agents/movie_replicate_agent.d.ts +23 -0
  16. package/lib/agents/movie_replicate_agent.js +93 -0
  17. package/lib/agents/tts_elevenlabs_agent.js +2 -2
  18. package/lib/agents/tts_nijivoice_agent.js +3 -2
  19. package/lib/agents/tts_openai_agent.js +3 -2
  20. package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
  21. package/lib/cli/commands/tool/scripting/builder.js +5 -0
  22. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
  23. package/lib/cli/commands/tool/scripting/handler.js +13 -4
  24. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  25. package/lib/cli/helpers.js +8 -3
  26. package/lib/methods/mulmo_presentation_style.d.ts +2 -1
  27. package/lib/methods/mulmo_presentation_style.js +21 -2
  28. package/lib/methods/mulmo_studio_context.js +1 -1
  29. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
  30. package/lib/tools/create_mulmo_script_from_url.js +129 -43
  31. package/lib/types/schema.d.ts +793 -163
  32. package/lib/types/schema.js +32 -1
  33. package/lib/types/type.d.ts +9 -2
  34. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  35. package/lib/utils/ffmpeg_utils.js +2 -2
  36. package/lib/utils/preprocess.d.ts +29 -6
  37. package/lib/utils/prompt.d.ts +2 -1
  38. package/lib/utils/prompt.js +10 -0
  39. package/lib/utils/utils.d.ts +3 -0
  40. package/lib/utils/utils.js +47 -0
  41. package/package.json +3 -2
  42. package/scripts/templates/presentation.json +123 -0
  43. package/scripts/templates/presentation.json~ +119 -0
package/README.md CHANGED
@@ -103,6 +103,11 @@ GOOGLE_PROJECT_ID=your_google_project_id
103
103
 
104
104
  See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
105
105
 
106
+ #### (Optional) For Movie models
107
+ ```bash
108
+ REPLICATE_API_TOKEN=your_replicate_api_key
109
+ ```
110
+
106
111
  #### (Optional) For TTS models
107
112
  ```bash
108
113
  # For Nijivoice TTS
@@ -19,6 +19,7 @@
19
19
  }
20
20
  .caption {
21
21
  /* Text positioned at the bottom */
22
+ width: 80%;
22
23
  position: absolute;
23
24
  bottom: 0px;
24
25
  /* Enable text wrapping */
@@ -34,6 +35,7 @@
34
35
  padding-right: 10%;
35
36
  padding-top: 4px;
36
37
  background: rgba(0, 0, 0, 0.4);
38
+ ${styles}
37
39
  }
38
40
  </style>
39
41
  </head>
@@ -3,5 +3,5 @@ import type { CallbackFunction } from "graphai";
3
3
  import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
4
4
  export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
5
5
  export declare const audioFilePath: (context: MulmoStudioContext) => string;
6
- export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
7
- export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
6
+ export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
7
+ export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
@@ -1,6 +1,5 @@
1
1
  import "dotenv/config";
2
- import { GraphAI } from "graphai";
3
- import { TaskManager } from "graphai/lib/task_manager.js";
2
+ import { GraphAI, TaskManager } from "graphai";
4
3
  import * as agents from "@graphai/vanilla";
5
4
  import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
6
5
  import addBGMAgent from "../agents/add_bgm_agent.js";
@@ -12,7 +11,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
12
11
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
13
12
  import { fileCacheAgentFilter } from "../utils/filters.js";
14
13
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
15
- import { text2hash, localizedText } from "../utils/utils.js";
14
+ import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
16
15
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
17
16
  import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
18
17
  const vanillaAgents = agents.default ?? agents;
@@ -200,7 +199,7 @@ const audioAgents = {
200
199
  addBGMAgent,
201
200
  combineAudioFilesAgent,
202
201
  };
203
- export const generateBeatAudio = async (index, context, callbacks) => {
202
+ export const generateBeatAudio = async (index, context, settings, callbacks) => {
204
203
  try {
205
204
  MulmoStudioContextMethods.setSessionState(context, "audio", true);
206
205
  const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -209,8 +208,9 @@ export const generateBeatAudio = async (index, context, callbacks) => {
209
208
  const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
210
209
  mkdir(outDirPath);
211
210
  mkdir(audioSegmentDirPath);
211
+ const config = settings2GraphAIConfig(settings);
212
212
  const taskManager = new TaskManager(getConcurrency(context));
213
- const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
213
+ const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager, config });
214
214
  graph.injectValue("__mapIndex", index);
215
215
  graph.injectValue("beat", context.studio.script.beats[index]);
216
216
  graph.injectValue("studioBeat", context.studio.beats[index]);
@@ -227,7 +227,7 @@ export const generateBeatAudio = async (index, context, callbacks) => {
227
227
  MulmoStudioContextMethods.setSessionState(context, "audio", false);
228
228
  }
229
229
  };
230
- export const audio = async (context, callbacks) => {
230
+ export const audio = async (context, settings, callbacks) => {
231
231
  try {
232
232
  MulmoStudioContextMethods.setSessionState(context, "audio", true);
233
233
  const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -239,8 +239,9 @@ export const audio = async (context, callbacks) => {
239
239
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
240
240
  mkdir(outDirPath);
241
241
  mkdir(audioSegmentDirPath);
242
+ const config = settings2GraphAIConfig(settings);
242
243
  const taskManager = new TaskManager(getConcurrency(context));
243
- const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
244
+ const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
244
245
  graph.injectValue("context", context);
245
246
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
246
247
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
@@ -1,3 +1,4 @@
1
+ import { mulmoCaptionParamsSchema } from "../types/index.js";
1
2
  import { GraphAI, GraphAILogger } from "graphai";
2
3
  import * as agents from "@graphai/vanilla";
3
4
  import { getHTMLFile, getCaptionImagePath } from "../utils/file.js";
@@ -23,22 +24,23 @@ const graph_data = {
23
24
  const { beat, context, index } = namedInputs;
24
25
  try {
25
26
  MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
26
- const caption = MulmoStudioContextMethods.getCaption(context);
27
+ const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
27
28
  const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
28
29
  const imagePath = getCaptionImagePath(context, index);
29
30
  const template = getHTMLFile("caption");
30
31
  const text = (() => {
31
32
  const multiLingual = context.multiLingual;
32
- if (caption && multiLingual) {
33
- return multiLingual[index].multiLingualTexts[caption].text;
33
+ if (captionParams.lang && multiLingual) {
34
+ return multiLingual[index].multiLingualTexts[captionParams.lang].text;
34
35
  }
35
- GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
36
+ GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
36
37
  return beat.text;
37
38
  })();
38
39
  const htmlData = interpolate(template, {
39
40
  caption: text,
40
41
  width: `${canvasSize.width}`,
41
42
  height: `${canvasSize.height}`,
43
+ styles: captionParams.styles.join(";\n"),
42
44
  });
43
45
  await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
44
46
  context.studio.beats[index].captionFile = imagePath;
@@ -61,7 +63,7 @@ const graph_data = {
61
63
  },
62
64
  };
63
65
  export const captions = async (context, callbacks) => {
64
- if (context.caption) {
66
+ if (MulmoStudioContextMethods.getCaption(context)) {
65
67
  try {
66
68
  MulmoStudioContextMethods.setSessionState(context, "caption", true);
67
69
  const graph = new GraphAI(graph_data, { ...vanillaAgents });
@@ -8,8 +8,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
8
8
  imageRefs: Record<string, string>;
9
9
  }) => Promise<{
10
10
  imageParams: {
11
- model?: string | undefined;
12
11
  style?: string | undefined;
12
+ model?: string | undefined;
13
13
  moderation?: string | undefined;
14
14
  images?: Record<string, {
15
15
  type: "image";
@@ -32,16 +32,18 @@ export declare const imagePreprocessAgent: (namedInputs: {
32
32
  imagePath: string | undefined;
33
33
  referenceImage: string | undefined;
34
34
  htmlPrompt?: undefined;
35
+ htmlImageSystemPrompt?: undefined;
35
36
  } | {
36
37
  imagePath: string;
37
38
  htmlPrompt: string;
39
+ htmlImageSystemPrompt: string[];
38
40
  } | {
39
41
  imagePath: string;
40
42
  images: string[];
41
43
  imageFromMovie: boolean;
42
44
  imageParams: {
43
- model?: string | undefined;
44
45
  style?: string | undefined;
46
+ model?: string | undefined;
45
47
  moderation?: string | undefined;
46
48
  images?: Record<string, {
47
49
  type: "image";
@@ -62,11 +64,12 @@ export declare const imagePreprocessAgent: (namedInputs: {
62
64
  };
63
65
  movieFile: string | undefined;
64
66
  htmlPrompt?: undefined;
67
+ htmlImageSystemPrompt?: undefined;
65
68
  } | {
66
69
  images: string[];
67
70
  imageParams: {
68
- model?: string | undefined;
69
71
  style?: string | undefined;
72
+ model?: string | undefined;
70
73
  moderation?: string | undefined;
71
74
  images?: Record<string, {
72
75
  type: "image";
@@ -90,6 +93,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
90
93
  referenceImage: string;
91
94
  prompt: string;
92
95
  htmlPrompt?: undefined;
96
+ htmlImageSystemPrompt?: undefined;
93
97
  }>;
94
98
  export declare const imagePluginAgent: (namedInputs: {
95
99
  context: MulmoStudioContext;
@@ -97,5 +101,5 @@ export declare const imagePluginAgent: (namedInputs: {
97
101
  index: number;
98
102
  }) => Promise<void>;
99
103
  export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
100
- export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
101
- export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
104
+ export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
105
+ export declare const generateBeatImage: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,21 +1,21 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
- import { GraphAI, GraphAILogger } from "graphai";
4
- import { TaskManager } from "graphai/lib/task_manager.js";
3
+ import { GraphAI, GraphAILogger, TaskManager } from "graphai";
5
4
  import * as agents from "@graphai/vanilla";
6
5
  import { openAIAgent } from "@graphai/openai_agent";
6
+ import { anthropicAgent } from "@graphai/anthropic_agent";
7
7
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
8
8
  import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
9
9
  import { fileCacheAgentFilter } from "../utils/filters.js";
10
- import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, mediaMockAgent } from "../agents/index.js";
10
+ import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
11
11
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
12
12
  import { findImagePlugin } from "../utils/image_plugins/index.js";
13
- import { imagePrompt } from "../utils/prompt.js";
13
+ import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
14
+ import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
14
15
  import { defaultOpenAIImageModel } from "../utils/const.js";
15
16
  import { renderHTMLToImage } from "../utils/markdown.js";
16
17
  const vanillaAgents = agents.default ?? agents;
17
18
  dotenv.config();
18
- // const openai = new OpenAI();
19
19
  import { GoogleAuth } from "google-auth-library";
20
20
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
21
21
  const htmlStyle = (context, beat) => {
@@ -43,7 +43,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
43
43
  }
44
44
  if (beat.htmlPrompt) {
45
45
  const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
46
- return { imagePath, htmlPrompt };
46
+ return { imagePath, htmlPrompt, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
47
47
  }
48
48
  // images for "edit_image"
49
49
  const images = (() => {
@@ -85,6 +85,7 @@ const beat_graph_data = {
85
85
  nodes: {
86
86
  context: {},
87
87
  imageAgentInfo: {},
88
+ htmlImageAgentInfo: {},
88
89
  movieAgentInfo: {},
89
90
  imageRefs: {},
90
91
  beat: {},
@@ -113,25 +114,21 @@ const beat_graph_data = {
113
114
  htmlImageAgent: {
114
115
  if: ":preprocessor.htmlPrompt",
115
116
  defaultValue: {},
116
- agent: "openAIAgent",
117
+ agent: ":htmlImageAgentInfo.agent",
118
+ params: {
119
+ mode: ":htmlImageAgentInfo.model",
120
+ },
117
121
  inputs: {
118
122
  prompt: ":preprocessor.htmlPrompt",
119
- system: [
120
- "Based on the provided information, create a single slide HTML page using Tailwind CSS.",
121
- "If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
122
- "Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
123
- "Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
124
- "If data is provided, use it effectively to populate the slide.",
125
- ],
123
+ system: ":preprocessor.htmlImageSystemPrompt",
126
124
  },
127
125
  },
128
126
  htmlImageGenerator: {
129
127
  if: ":preprocessor.htmlPrompt",
130
128
  defaultValue: {},
131
129
  agent: htmlImageGeneratorAgent,
132
- // console: { before: true, after: true },
133
130
  inputs: {
134
- html: ":htmlImageAgent.text.codeBlock()",
131
+ html: ":htmlImageAgent.text.codeBlockOrRaw()",
135
132
  canvasSize: ":context.presentationStyle.canvasSize",
136
133
  file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
137
134
  mulmoContext: ":context", // for fileCacheAgentFilter
@@ -213,6 +210,7 @@ const graph_data = {
213
210
  nodes: {
214
211
  context: {},
215
212
  imageAgentInfo: {},
213
+ htmlImageAgentInfo: {},
216
214
  movieAgentInfo: {},
217
215
  outputStudioFilePath: {},
218
216
  imageRefs: {},
@@ -222,6 +220,7 @@ const graph_data = {
222
220
  rows: ":context.studio.script.beats",
223
221
  context: ":context",
224
222
  imageAgentInfo: ":imageAgentInfo",
223
+ htmlImageAgentInfo: ":htmlImageAgentInfo",
225
224
  movieAgentInfo: ":movieAgentInfo",
226
225
  imageRefs: ":imageRefs",
227
226
  },
@@ -268,7 +267,6 @@ const graph_data = {
268
267
  },
269
268
  },
270
269
  writeOutput: {
271
- // console: { before: true },
272
270
  agent: "fileWriteAgent",
273
271
  inputs: {
274
272
  file: ":outputStudioFilePath",
@@ -291,7 +289,7 @@ const googleAuth = async () => {
291
289
  throw error;
292
290
  }
293
291
  };
294
- const graphOption = async (context) => {
292
+ const graphOption = async (context, settings) => {
295
293
  const agentFilters = [
296
294
  {
297
295
  name: "fileCacheAgentFilter",
@@ -305,21 +303,22 @@ const graphOption = async (context) => {
305
303
  taskManager,
306
304
  };
307
305
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
306
+ const config = settings2GraphAIConfig(settings);
308
307
  // We need to get google's auth token only if the google is the text2image provider.
309
308
  if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
309
+ userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
310
310
  GraphAILogger.log("google was specified as text2image engine");
311
311
  const token = await googleAuth();
312
- options.config = {
313
- imageGoogleAgent: {
314
- projectId: process.env.GOOGLE_PROJECT_ID,
315
- token,
316
- },
317
- movieGoogleAgent: {
318
- projectId: process.env.GOOGLE_PROJECT_ID,
319
- token,
320
- },
312
+ config["imageGoogleAgent"] = {
313
+ projectId: process.env.GOOGLE_PROJECT_ID,
314
+ token,
315
+ };
316
+ config["movieGoogleAgent"] = {
317
+ projectId: process.env.GOOGLE_PROJECT_ID,
318
+ token,
321
319
  };
322
320
  }
321
+ options.config = config;
323
322
  return options;
324
323
  };
325
324
  // TODO: unit test
@@ -370,13 +369,28 @@ const prepareGenerateImages = async (context) => {
370
369
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
371
370
  mkdir(imageProjectDirPath);
372
371
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, context.dryRun);
372
+ const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
373
373
  const imageRefs = await getImageRefs(context);
374
+ // Determine movie agent based on provider
375
+ const getMovieAgent = () => {
376
+ if (context.dryRun)
377
+ return "mediaMockAgent";
378
+ const provider = context.presentationStyle.movieParams?.provider ?? "google";
379
+ switch (provider) {
380
+ case "replicate":
381
+ return "movieReplicateAgent";
382
+ case "google":
383
+ default:
384
+ return "movieGoogleAgent";
385
+ }
386
+ };
374
387
  GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
375
388
  const injections = {
376
389
  context,
377
390
  imageAgentInfo,
391
+ htmlImageAgentInfo,
378
392
  movieAgentInfo: {
379
- agent: context.dryRun ? "mediaMockAgent" : "movieGoogleAgent",
393
+ agent: getMovieAgent(),
380
394
  },
381
395
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
382
396
  imageRefs,
@@ -384,6 +398,9 @@ const prepareGenerateImages = async (context) => {
384
398
  return injections;
385
399
  };
386
400
  const getConcurrency = (context) => {
401
+ if (context.presentationStyle.movieParams?.provider === "replicate") {
402
+ return 4;
403
+ }
387
404
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
388
405
  if (imageAgentInfo.provider === "openai") {
389
406
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -393,10 +410,20 @@ const getConcurrency = (context) => {
393
410
  }
394
411
  return 4;
395
412
  };
396
- const generateImages = async (context, callbacks) => {
397
- const options = await graphOption(context);
413
+ const generateImages = async (context, settings, callbacks) => {
414
+ const options = await graphOption(context, settings);
398
415
  const injections = await prepareGenerateImages(context);
399
- const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
416
+ const graph = new GraphAI(graph_data, {
417
+ ...vanillaAgents,
418
+ imageGoogleAgent,
419
+ movieGoogleAgent,
420
+ movieReplicateAgent,
421
+ imageOpenaiAgent,
422
+ mediaMockAgent,
423
+ fileWriteAgent,
424
+ openAIAgent,
425
+ anthropicAgent,
426
+ }, options);
400
427
  Object.keys(injections).forEach((key) => {
401
428
  graph.injectValue(key, injections[key]);
402
429
  });
@@ -408,10 +435,10 @@ const generateImages = async (context, callbacks) => {
408
435
  const res = await graph.run();
409
436
  return res.mergeResult;
410
437
  };
411
- export const images = async (context, callbacks) => {
438
+ export const images = async (context, settings, callbacks) => {
412
439
  try {
413
440
  MulmoStudioContextMethods.setSessionState(context, "image", true);
414
- const newContext = await generateImages(context, callbacks);
441
+ const newContext = await generateImages(context, settings, callbacks);
415
442
  MulmoStudioContextMethods.setSessionState(context, "image", false);
416
443
  return newContext;
417
444
  }
@@ -420,10 +447,20 @@ export const images = async (context, callbacks) => {
420
447
  throw error;
421
448
  }
422
449
  };
423
- export const generateBeatImage = async (index, context, callbacks) => {
424
- const options = await graphOption(context);
450
+ export const generateBeatImage = async (index, context, settings, callbacks) => {
451
+ const options = await graphOption(context, settings);
425
452
  const injections = await prepareGenerateImages(context);
426
- const graph = new GraphAI(beat_graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, mediaMockAgent, fileWriteAgent, openAIAgent }, options);
453
+ const graph = new GraphAI(beat_graph_data, {
454
+ ...vanillaAgents,
455
+ imageGoogleAgent,
456
+ movieGoogleAgent,
457
+ movieReplicateAgent,
458
+ imageOpenaiAgent,
459
+ mediaMockAgent,
460
+ fileWriteAgent,
461
+ openAIAgent,
462
+ anthropicAgent,
463
+ }, options);
427
464
  Object.keys(injections).forEach((key) => {
428
465
  if ("outputStudioFilePath" !== key) {
429
466
  graph.injectValue(key, injections[key]);
@@ -1,5 +1,5 @@
1
- import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
2
- export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
1
+ import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType, MulmoFillOption } from "../types/index.js";
2
+ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption) => {
3
3
  videoId: string;
4
4
  videoPart: string;
5
5
  };
@@ -1,12 +1,12 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- import { mulmoTransitionSchema } from "../types/index.js";
2
+ import { mulmoTransitionSchema, mulmoFillOptionSchema } from "../types/index.js";
3
3
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
4
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
5
5
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
6
6
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
7
7
  // const isMac = process.platform === "darwin";
8
8
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
9
- export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
9
+ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo, fillOption) => {
10
10
  const videoId = `v${inputIndex}`;
11
11
  const videoFilters = [];
12
12
  // Handle different media types
@@ -19,9 +19,19 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
19
19
  videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
20
20
  }
21
21
  // Common filters for all media types
22
- videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
23
- // In case of the aspect ratio mismatch, we fill the extra space with black color.
24
- `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
22
+ videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS");
23
+ // Apply scaling based on fill option
24
+ if (fillOption.style === "aspectFill") {
25
+ // For aspect fill: scale to fill the canvas completely, cropping if necessary
26
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=increase`, `crop=${canvasInfo.width}:${canvasInfo.height}`);
27
+ }
28
+ else {
29
+ // For aspect fit: scale to fit within canvas, padding if necessary
30
+ videoFilters.push(`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
31
+ // In case of the aspect ratio mismatch, we fill the extra space with black color.
32
+ `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`);
33
+ }
34
+ videoFilters.push("setsar=1", "format=yuv420p");
25
35
  return {
26
36
  videoId,
27
37
  videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
@@ -59,7 +69,8 @@ const getOutputOption = (audioId, videoId) => {
59
69
  "-b:a 128k", // Audio bitrate
60
70
  ];
61
71
  };
62
- const createVideo = async (audioArtifactFilePath, outputVideoPath, context, caption) => {
72
+ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
73
+ const caption = MulmoStudioContextMethods.getCaption(context);
63
74
  const start = performance.now();
64
75
  const ffmpegContext = FfmpegContextInit();
65
76
  const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
@@ -95,7 +106,12 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context, capt
95
106
  return 0;
96
107
  })();
97
108
  const duration = studioBeat.duration + extraPadding;
98
- const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
109
+ // Get fillOption from merged imageParams (global + beat-specific)
110
+ const globalFillOption = context.presentationStyle.movieParams?.fillOption;
111
+ const beatFillOption = beat.movieParams?.fillOption;
112
+ const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
113
+ const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
114
+ const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption);
99
115
  ffmpegContext.filterComplex.push(videoPart);
100
116
  if (caption && studioBeat.captionFile) {
101
117
  const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
@@ -193,12 +209,11 @@ export const movieFilePath = (context) => {
193
209
  export const movie = async (context) => {
194
210
  MulmoStudioContextMethods.setSessionState(context, "video", true);
195
211
  try {
196
- const caption = MulmoStudioContextMethods.getCaption(context);
197
212
  const fileName = MulmoStudioContextMethods.getFileName(context);
198
213
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
199
214
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
200
215
  const outputVideoPath = movieFilePath(context);
201
- if (await createVideo(audioArtifactFilePath, outputVideoPath, context, caption)) {
216
+ if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
202
217
  writingMessage(outputVideoPath);
203
218
  }
204
219
  }
@@ -1,4 +1,4 @@
1
- import { assert } from "graphai";
1
+ import { assert, GraphAILogger } from "graphai";
2
2
  import { silent60secPath } from "../utils/file.js";
3
3
  import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
4
4
  const getMovieDulation = async (beat) => {
@@ -77,7 +77,8 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
77
77
  const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
78
78
  // Yes, the current beat has spilled over audio.
79
79
  const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
80
- if (beatsTotalDuration > audioDuration) {
80
+ if (beatsTotalDuration > audioDuration + 0.01) {
81
+ // 0.01 is a tolerance to avoid floating point precision issues
81
82
  group.reduce((remaining, idx, iGroup) => {
82
83
  if (remaining >= groupBeatsDurations[iGroup]) {
83
84
  return remaining - groupBeatsDurations[iGroup];
@@ -88,7 +89,9 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
88
89
  }
89
90
  else {
90
91
  // Last beat gets the rest of the audio.
91
- groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
92
+ if (audioDuration > beatsTotalDuration) {
93
+ groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
94
+ }
92
95
  }
93
96
  beatDurations.push(...groupBeatsDurations);
94
97
  }
@@ -98,7 +101,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
98
101
  // padding is the amount of audio padding specified in the script.
99
102
  const padding = getPadding(context, beat, index);
100
103
  // totalPadding is the amount of audio padding to be added to the audio file.
101
- const totalPadding = getTotalPadding(padding, movieDuration, audioDuration, beat.duration);
104
+ const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
102
105
  const beatDuration = audioDuration + totalPadding;
103
106
  beatDurations.push(beatDuration);
104
107
  if (totalPadding > 0) {
@@ -124,7 +127,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
124
127
  // We cannot reuse longSilentId. We need to explicitly split it for each beat.
125
128
  const silentIds = mediaDurations.filter((md) => md.silenceDuration > 0).map((_, index) => `[ls_${index}]`);
126
129
  if (silentIds.length > 0) {
127
- const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath());
130
+ const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
128
131
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
129
132
  }
130
133
  const inputIds = [];
@@ -142,6 +145,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
142
145
  }
143
146
  });
144
147
  assert(silentIds.length === 0, "silentIds.length !== 0");
148
+ GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
145
149
  // Finally, combine all audio files.
146
150
  ffmpegContext.filterComplex.push(`${inputIds.join("")}concat=n=${inputIds.length}:v=0:a=1[aout]`);
147
151
  await FfmpegContextGenerateOutput(ffmpegContext, combinedFileName, ["-map", "[aout]"]);
@@ -13,6 +13,8 @@ export declare const imageOpenaiAgent: AgentFunction<{
13
13
  }, {
14
14
  prompt: string;
15
15
  images: string[] | null | undefined;
16
+ }, {
17
+ apiKey?: string;
16
18
  }>;
17
19
  declare const imageOpenaiAgentInfo: AgentFunctionInfo;
18
20
  export default imageOpenaiAgentInfo;
@@ -4,9 +4,10 @@ import { GraphAILogger } from "graphai";
4
4
  import OpenAI, { toFile } from "openai";
5
5
  import { defaultOpenAIImageModel } from "../utils/const.js";
6
6
  // https://platform.openai.com/docs/guides/image-generation
7
- export const imageOpenaiAgent = async ({ namedInputs, params }) => {
7
+ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
8
8
  const { prompt, images } = namedInputs;
9
- const { apiKey, moderation, canvasSize } = params;
9
+ const { moderation, canvasSize } = params;
10
+ const { apiKey } = { ...config };
10
11
  const model = params.model ?? defaultOpenAIImageModel;
11
12
  const openai = new OpenAI({ apiKey });
12
13
  const size = (() => {
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -13,4 +14,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
13
14
  import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
16
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
17
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -4,6 +4,7 @@ import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
5
  import tavilySearchAgent from "./tavily_agent.js";
6
6
  import movieGoogleAgent from "./movie_google_agent.js";
7
+ import movieReplicateAgent from "./movie_replicate_agent.js";
7
8
  import mediaMockAgent from "./media_mock_agent.js";
8
9
  import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
9
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
@@ -14,4 +15,4 @@ import { textInputAgent } from "@graphai/input_agents";
14
15
  import { openAIAgent } from "@graphai/openai_agent";
15
16
  // import * as vanilla from "@graphai/vanilla";
16
17
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
17
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
18
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -0,0 +1,23 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ export declare const getAspectRatio: (canvasSize: {
3
+ width: number;
4
+ height: number;
5
+ }) => string;
6
+ export type MovieReplicateConfig = {
7
+ apiKey?: string;
8
+ };
9
+ export declare const movieReplicateAgent: AgentFunction<{
10
+ model: `${string}/${string}` | undefined;
11
+ canvasSize: {
12
+ width: number;
13
+ height: number;
14
+ };
15
+ duration?: number;
16
+ }, {
17
+ buffer: Buffer;
18
+ }, {
19
+ prompt: string;
20
+ imagePath?: string;
21
+ }, MovieReplicateConfig>;
22
+ declare const movieReplicateAgentInfo: AgentFunctionInfo;
23
+ export default movieReplicateAgentInfo;