mulmocast 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/assets/templates/characters.json +16 -0
  2. package/assets/templates/html.json +6 -0
  3. package/lib/actions/audio.js +13 -19
  4. package/lib/actions/image_agents.d.ts +145 -0
  5. package/lib/actions/image_agents.js +59 -0
  6. package/lib/actions/image_references.d.ts +9 -0
  7. package/lib/actions/image_references.js +79 -0
  8. package/lib/actions/images.d.ts +17 -109
  9. package/lib/actions/images.js +83 -188
  10. package/lib/actions/index.d.ts +2 -0
  11. package/lib/actions/index.js +2 -0
  12. package/lib/actions/movie.js +3 -1
  13. package/lib/actions/pdf.js +5 -2
  14. package/lib/agents/image_google_agent.d.ts +2 -15
  15. package/lib/agents/image_google_agent.js +5 -5
  16. package/lib/agents/image_openai_agent.d.ts +2 -17
  17. package/lib/agents/image_openai_agent.js +9 -9
  18. package/lib/agents/movie_google_agent.d.ts +2 -17
  19. package/lib/agents/movie_google_agent.js +7 -7
  20. package/lib/agents/movie_replicate_agent.d.ts +2 -16
  21. package/lib/agents/movie_replicate_agent.js +4 -4
  22. package/lib/agents/tts_google_agent.d.ts +9 -1
  23. package/lib/agents/tts_google_agent.js +2 -2
  24. package/lib/agents/tts_nijivoice_agent.js +1 -1
  25. package/lib/agents/tts_openai_agent.d.ts +13 -1
  26. package/lib/agents/tts_openai_agent.js +2 -2
  27. package/lib/cli/helpers.js +7 -7
  28. package/lib/index.d.ts +1 -0
  29. package/lib/index.js +1 -0
  30. package/lib/methods/index.d.ts +1 -0
  31. package/lib/methods/index.js +1 -0
  32. package/lib/methods/mulmo_beat.d.ts +6 -0
  33. package/lib/methods/mulmo_beat.js +21 -0
  34. package/lib/methods/mulmo_presentation_style.d.ts +3 -1
  35. package/lib/methods/mulmo_presentation_style.js +31 -7
  36. package/lib/methods/mulmo_studio_context.js +3 -0
  37. package/lib/tools/story_to_script.js +2 -2
  38. package/lib/types/agent.d.ts +55 -0
  39. package/lib/types/agent.js +3 -0
  40. package/lib/types/schema.d.ts +560 -296
  41. package/lib/types/schema.js +19 -10
  42. package/lib/types/type.d.ts +3 -2
  43. package/lib/utils/const.d.ts +0 -1
  44. package/lib/utils/const.js +0 -1
  45. package/lib/utils/context.d.ts +24 -13
  46. package/lib/utils/context.js +1 -0
  47. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  48. package/lib/utils/ffmpeg_utils.js +1 -1
  49. package/lib/utils/file.js +4 -4
  50. package/lib/utils/filters.js +3 -4
  51. package/lib/utils/markdown.js +1 -1
  52. package/lib/utils/preprocess.d.ts +15 -8
  53. package/lib/utils/provider2agent.d.ts +72 -0
  54. package/lib/utils/provider2agent.js +81 -0
  55. package/lib/utils/string.js +5 -5
  56. package/lib/utils/utils.d.ts +13 -11
  57. package/lib/utils/utils.js +56 -62
  58. package/package.json +7 -6
  59. package/scripts/templates/html.json +42 -0
  60. package/scripts/templates/image_refs.json +35 -0
@@ -1,86 +1,38 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
3
  import { GraphAI, GraphAILogger, TaskManager } from "graphai";
4
- import * as agents from "@graphai/vanilla";
4
+ import { GoogleAuth } from "google-auth-library";
5
+ import * as vanilla from "@graphai/vanilla";
5
6
  import { openAIAgent } from "@graphai/openai_agent";
6
7
  import { anthropicAgent } from "@graphai/anthropic_agent";
7
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
8
- import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
9
- import { fileCacheAgentFilter } from "../utils/filters.js";
10
9
  import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
11
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
12
- import { findImagePlugin } from "../utils/image_plugins/index.js";
13
- import { userAssert, settings2GraphAIConfig, getExtention } from "../utils/utils.js";
14
- import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
15
- import { defaultOpenAIImageModel } from "../utils/const.js";
16
- import { renderHTMLToImage } from "../utils/markdown.js";
17
- const vanillaAgents = agents.default ?? agents;
18
- dotenv.config();
19
- import { GoogleAuth } from "google-auth-library";
11
+ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
12
+ import { fileCacheAgentFilter } from "../utils/filters.js";
13
+ import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
20
14
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
21
- const htmlStyle = (context, beat) => {
22
- return {
23
- canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
24
- textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
25
- };
15
+ import { getImageRefs } from "./image_references.js";
16
+ import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ const vanillaAgents = vanilla.default ?? vanilla;
18
+ const imageAgents = {
19
+ imageGoogleAgent,
20
+ imageOpenaiAgent,
26
21
  };
27
- export const imagePreprocessAgent = async (namedInputs) => {
28
- const { context, beat, index, imageRefs } = namedInputs;
29
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
30
- // const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
31
- const imagePath = getBeatPngImagePath(context, index);
32
- const returnValue = {
33
- imageParams: imageAgentInfo.imageParams,
34
- movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
35
- };
36
- if (beat.image) {
37
- const plugin = findImagePlugin(beat?.image?.type);
38
- if (!plugin) {
39
- throw new Error(`invalid beat image type: ${beat.image}`);
40
- }
41
- const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
42
- // undefined prompt indicates that image generation is not needed
43
- return { imagePath: path, referenceImage: path, ...returnValue };
44
- }
45
- if (beat.htmlPrompt) {
46
- const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
47
- const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
48
- return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
49
- }
50
- // images for "edit_image"
51
- const images = (() => {
52
- const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
53
- const sources = imageNames.map((name) => imageRefs[name]);
54
- return sources.filter((source) => source !== undefined);
55
- })();
56
- if (beat.moviePrompt && !beat.imagePrompt) {
57
- return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
58
- }
59
- const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
60
- return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
61
- };
62
- export const imagePluginAgent = async (namedInputs) => {
63
- const { context, beat, index } = namedInputs;
64
- const imagePath = getBeatPngImagePath(context, index);
65
- const plugin = findImagePlugin(beat?.image?.type);
66
- if (!plugin) {
67
- throw new Error(`invalid beat image type: ${beat.image}`);
68
- }
69
- try {
70
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
71
- const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
72
- await plugin.process(processorParams);
73
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
74
- }
75
- catch (error) {
76
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
77
- throw error;
78
- }
22
+ const movieAgents = {
23
+ movieGoogleAgent,
24
+ movieReplicateAgent,
79
25
  };
80
- const htmlImageGeneratorAgent = async (namedInputs) => {
81
- const { file, canvasSize, htmlText } = namedInputs;
82
- await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
26
+ const defaultAgents = {
27
+ ...vanillaAgents,
28
+ ...imageAgents,
29
+ ...movieAgents,
30
+ mediaMockAgent,
31
+ fileWriteAgent,
32
+ openAIAgent,
33
+ anthropicAgent,
83
34
  };
35
+ dotenv.config();
84
36
  const beat_graph_data = {
85
37
  version: 0.5,
86
38
  concurrency: 4,
@@ -91,6 +43,8 @@ const beat_graph_data = {
91
43
  imageRefs: {},
92
44
  beat: {},
93
45
  __mapIndex: {},
46
+ forceMovie: { value: false },
47
+ forceImage: { value: false },
94
48
  preprocessor: {
95
49
  agent: imagePreprocessAgent,
96
50
  inputs: {
@@ -108,7 +62,7 @@ const beat_graph_data = {
108
62
  context: ":context",
109
63
  beat: ":beat",
110
64
  index: ":__mapIndex",
111
- onComplete: ":preprocessor",
65
+ onComplete: [":preprocessor"],
112
66
  },
113
67
  },
114
68
  htmlImageAgent: {
@@ -122,10 +76,13 @@ const beat_graph_data = {
122
76
  model: ":htmlImageAgentInfo.model",
123
77
  max_tokens: ":htmlImageAgentInfo.max_tokens",
124
78
  },
125
- file: ":preprocessor.htmlPath", // only for fileCacheAgentFilter
126
- mulmoContext: ":context", // for fileCacheAgentFilter
127
- index: ":__mapIndex", // for fileCacheAgentFilter
128
- sessionType: "html", // for fileCacheAgentFilter
79
+ cache: {
80
+ force: [":context.force", ":forceImage"],
81
+ file: ":preprocessor.htmlPath",
82
+ index: ":__mapIndex",
83
+ mulmoContext: ":context",
84
+ sessionType: "html",
85
+ },
129
86
  },
130
87
  },
131
88
  htmlReader: {
@@ -135,7 +92,7 @@ const beat_graph_data = {
135
92
  return { html };
136
93
  },
137
94
  inputs: {
138
- onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
95
+ onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
139
96
  htmlPath: ":preprocessor.htmlPath",
140
97
  },
141
98
  output: {
@@ -150,10 +107,7 @@ const beat_graph_data = {
150
107
  inputs: {
151
108
  htmlText: ":htmlReader.htmlText",
152
109
  canvasSize: ":context.presentationStyle.canvasSize",
153
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
154
- mulmoContext: ":context", // for fileCacheAgentFilter
155
- index: ":__mapIndex", // for fileCacheAgentFilter
156
- sessionType: "image", // for fileCacheAgentFilter
110
+ file: ":preprocessor.imagePath",
157
111
  },
158
112
  },
159
113
  imageGenerator: {
@@ -162,12 +116,14 @@ const beat_graph_data = {
162
116
  retry: 2,
163
117
  inputs: {
164
118
  prompt: ":preprocessor.prompt",
165
- images: ":preprocessor.images",
166
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
167
- force: ":context.force", // only for fileCacheAgentFilter
168
- mulmoContext: ":context", // for fileCacheAgentFilter
169
- index: ":__mapIndex", // for fileCacheAgentFilter
170
- sessionType: "image", // for fileCacheAgentFilter
119
+ referenceImages: ":preprocessor.referenceImages",
120
+ cache: {
121
+ force: [":context.force", ":forceImage"],
122
+ file: ":preprocessor.imagePath",
123
+ index: ":__mapIndex",
124
+ mulmoContext: ":context",
125
+ sessionType: "image",
126
+ },
171
127
  params: {
172
128
  model: ":preprocessor.imageParams.model",
173
129
  moderation: ":preprocessor.imageParams.moderation",
@@ -182,14 +138,16 @@ const beat_graph_data = {
182
138
  inputs: {
183
139
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
184
140
  prompt: ":beat.moviePrompt",
185
- imagePath: ":preprocessor.referenceImage",
186
- file: ":preprocessor.movieFile",
187
- studio: ":context.studio", // for cache
188
- mulmoContext: ":context", // for fileCacheAgentFilter
189
- index: ":__mapIndex", // for cache
190
- sessionType: "movie", // for cache
141
+ imagePath: ":preprocessor.referenceImageForMovie",
142
+ cache: {
143
+ force: [":context.force", ":forceMovie"],
144
+ file: ":preprocessor.movieFile",
145
+ index: ":__mapIndex",
146
+ sessionType: "movie",
147
+ mulmoContext: ":context",
148
+ },
191
149
  params: {
192
- model: ":context.presentationStyle.movieParams.model",
150
+ model: ":preprocessor.movieParams.model",
193
151
  duration: ":beat.duration",
194
152
  canvasSize: ":context.presentationStyle.canvasSize",
195
153
  },
@@ -199,15 +157,14 @@ const beat_graph_data = {
199
157
  imageFromMovie: {
200
158
  if: ":preprocessor.imageFromMovie",
201
159
  agent: async (namedInputs) => {
202
- await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
203
- return { generatedImage: true };
160
+ return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
204
161
  },
205
162
  inputs: {
206
- onComplete: ":movieGenerator", // to wait for movieGenerator to finish
163
+ onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
207
164
  imageFile: ":preprocessor.imagePath",
208
165
  movieFile: ":preprocessor.movieFile",
209
166
  },
210
- defaultValue: { generatedImage: false },
167
+ defaultValue: {},
211
168
  },
212
169
  output: {
213
170
  agent: "copyAgent",
@@ -307,21 +264,19 @@ const googleAuth = async () => {
307
264
  throw error;
308
265
  }
309
266
  };
310
- const graphOption = async (context, settings) => {
311
- const agentFilters = [
312
- {
313
- name: "fileCacheAgentFilter",
314
- agent: fileCacheAgentFilter,
315
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
316
- },
317
- ];
318
- const taskManager = new TaskManager(getConcurrency(context));
267
+ export const graphOption = async (context, settings) => {
319
268
  const options = {
320
- agentFilters,
321
- taskManager,
269
+ agentFilters: [
270
+ {
271
+ name: "fileCacheAgentFilter",
272
+ agent: fileCacheAgentFilter,
273
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
274
+ },
275
+ ],
276
+ taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
322
277
  };
323
278
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
324
- const config = settings2GraphAIConfig(settings);
279
+ const config = settings2GraphAIConfig(settings, process.env);
325
280
  // We need to get google's auth token only if the google is the text2image provider.
326
281
  if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
327
282
  userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
@@ -339,32 +294,6 @@ const graphOption = async (context, settings) => {
339
294
  options.config = config;
340
295
  return options;
341
296
  };
342
- // TODO: unit test
343
- export const getImageRefs = async (context) => {
344
- const imageRefs = {};
345
- const images = context.presentationStyle.imageParams?.images;
346
- if (images) {
347
- await Promise.all(Object.keys(images).map(async (key) => {
348
- const image = images[key];
349
- if (image.source.kind === "path") {
350
- imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
351
- }
352
- else if (image.source.kind === "url") {
353
- const response = await fetch(image.source.url);
354
- if (!response.ok) {
355
- throw new Error(`Failed to download image: ${image.source.url}`);
356
- }
357
- const buffer = Buffer.from(await response.arrayBuffer());
358
- // Detect file extension from Content-Type header or URL
359
- const extension = getExtention(response.headers.get("content-type"), image.source.url);
360
- const imagePath = getReferenceImagePath(context, key, extension);
361
- await fs.promises.writeFile(imagePath, buffer);
362
- imageRefs[key] = imagePath;
363
- }
364
- }));
365
- }
366
- return imageRefs;
367
- };
368
297
  const prepareGenerateImages = async (context) => {
369
298
  const fileName = MulmoStudioContextMethods.getFileName(context);
370
299
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -373,56 +302,26 @@ const prepareGenerateImages = async (context) => {
373
302
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
374
303
  const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
375
304
  const imageRefs = await getImageRefs(context);
376
- // Determine movie agent based on provider
377
- const getMovieAgent = () => {
378
- const provider = context.presentationStyle.movieParams?.provider ?? "google";
379
- switch (provider) {
380
- case "replicate":
381
- return "movieReplicateAgent";
382
- case "google":
383
- default:
384
- return "movieGoogleAgent";
385
- }
386
- };
387
305
  GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
388
306
  const injections = {
389
307
  context,
390
308
  htmlImageAgentInfo,
391
309
  movieAgentInfo: {
392
- agent: getMovieAgent(),
310
+ agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
393
311
  },
394
312
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
395
313
  imageRefs,
396
314
  };
397
315
  return injections;
398
316
  };
399
- const getConcurrency = (context) => {
400
- if (context.presentationStyle.movieParams?.provider === "replicate") {
401
- return 4;
402
- }
403
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
404
- if (imageAgentInfo.imageParams.provider === "openai") {
405
- // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
406
- // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
407
- // gpt-image-1:3,000,000 TPM、150 images per minute
408
- return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
409
- }
410
- return 4;
411
- };
412
- const generateImages = async (context, settings, callbacks) => {
413
- const options = await graphOption(context, settings);
317
+ const generateImages = async (context, settings, callbacks, options) => {
318
+ const optionImageAgents = options?.imageAgents ?? {};
414
319
  const injections = await prepareGenerateImages(context);
415
- const graph = new GraphAI(graph_data, {
416
- ...vanillaAgents,
417
- imageGoogleAgent,
418
- movieGoogleAgent,
419
- movieReplicateAgent,
420
- imageOpenaiAgent,
421
- mediaMockAgent,
422
- fileWriteAgent,
423
- openAIAgent,
424
- anthropicAgent,
425
- }, options);
320
+ const graphaiAgent = {
321
+ ...defaultAgents,
322
+ ...optionImageAgents,
323
+ };
324
+ const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
426
325
  Object.keys(injections).forEach((key) => {
427
326
  graph.injectValue(key, injections[key]);
428
327
  });
@@ -434,10 +333,12 @@ const generateImages = async (context, settings, callbacks) => {
434
333
  const res = await graph.run();
435
334
  return res.mergeResult;
436
335
  };
437
- export const images = async (context, settings, callbacks) => {
336
+ // public api
337
+ export const images = async (context, args) => {
338
+ const { settings, callbacks, options } = args ?? {};
438
339
  try {
439
340
  MulmoStudioContextMethods.setSessionState(context, "image", true);
440
- const newContext = await generateImages(context, settings, callbacks);
341
+ const newContext = await generateImages(context, settings, callbacks, options);
441
342
  MulmoStudioContextMethods.setSessionState(context, "image", false);
442
343
  return newContext;
443
344
  }
@@ -446,20 +347,12 @@ export const images = async (context, settings, callbacks) => {
446
347
  throw error;
447
348
  }
448
349
  };
449
- export const generateBeatImage = async (index, context, settings, callbacks) => {
350
+ // public api
351
+ export const generateBeatImage = async (inputs) => {
352
+ const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
450
353
  const options = await graphOption(context, settings);
451
354
  const injections = await prepareGenerateImages(context);
452
- const graph = new GraphAI(beat_graph_data, {
453
- ...vanillaAgents,
454
- imageGoogleAgent,
455
- movieGoogleAgent,
456
- movieReplicateAgent,
457
- imageOpenaiAgent,
458
- mediaMockAgent,
459
- fileWriteAgent,
460
- openAIAgent,
461
- anthropicAgent,
462
- }, options);
355
+ const graph = new GraphAI(beat_graph_data, defaultAgents, options);
463
356
  Object.keys(injections).forEach((key) => {
464
357
  if ("outputStudioFilePath" !== key) {
465
358
  graph.injectValue(key, injections[key]);
@@ -467,6 +360,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
467
360
  });
468
361
  graph.injectValue("__mapIndex", index);
469
362
  graph.injectValue("beat", context.studio.script.beats[index]);
363
+ graph.injectValue("forceMovie", forceMovie ?? false);
364
+ graph.injectValue("forceImage", forceImage ?? false);
470
365
  if (callbacks) {
471
366
  callbacks.forEach((callback) => {
472
367
  graph.registerCallback(callback);
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
218
218
  // Concatenate the trimmed images
219
219
  const concatVideoId = "concat_video";
220
220
  const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
221
- ffmpegContext.filterComplex.push(`${videoIds.map((id) => `[${id}]`).join("")}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`);
221
+ const inputs = videoIds.map((id) => `[${id}]`).join("");
222
+ const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
223
+ ffmpegContext.filterComplex.push(filter);
222
224
  const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
223
225
  const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
224
226
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
@@ -1,6 +1,7 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import puppeteer from "puppeteer";
4
+ import { GraphAILogger } from "graphai";
4
5
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
5
6
  import { localizedText, isHttp } from "../utils/utils.js";
6
7
  import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
17
18
  const mimeType = ext === "jpg" ? "jpeg" : ext;
18
19
  return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
19
20
  }
20
- catch (__error) {
21
+ catch (error) {
22
+ GraphAILogger.info("loadImage failed", error);
21
23
  const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
22
24
  return `data:image/png;base64,${placeholderData.toString("base64")}`;
23
25
  }
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
101
103
  const imagePaths = studio.beats.map((beat) => beat.imageFile);
102
104
  const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
103
105
  const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
104
- const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
106
+ const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
107
+ const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
105
108
  const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
106
109
  const template = getHTMLFile(`pdf_${pdfMode}`);
107
110
  const baseTemplateData = {
@@ -1,18 +1,5 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type ImageGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
6
- export declare const imageGoogleAgent: AgentFunction<{
7
- model: string;
8
- canvasSize: {
9
- width: number;
10
- height: number;
11
- };
12
- }, {
13
- buffer: Buffer;
14
- }, {
15
- prompt: string;
16
- }, ImageGoogleConfig>;
2
+ import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
17
4
  declare const imageGoogleAgentInfo: AgentFunctionInfo;
18
5
  export default imageGoogleAgentInfo;
@@ -1,5 +1,6 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import { getAspectRatio } from "./movie_google_agent.js";
3
+ import { provider2ImageAgent } from "../utils/provider2agent.js";
3
4
  async function generateImage(projectId, model, token, prompt, aspectRatio) {
4
5
  const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
5
6
  try {
@@ -7,12 +8,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
7
8
  const payload = {
8
9
  instances: [
9
10
  {
10
- prompt: prompt,
11
+ prompt,
11
12
  },
12
13
  ],
13
14
  parameters: {
14
15
  sampleCount: 1,
15
- aspectRatio: aspectRatio,
16
+ aspectRatio,
16
17
  safetySetting: "block_only_high",
17
18
  },
18
19
  };
@@ -51,11 +52,10 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
51
52
  throw error;
52
53
  }
53
54
  }
54
- export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
55
+ export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
55
56
  const { prompt } = namedInputs;
56
57
  const aspectRatio = getAspectRatio(params.canvasSize);
57
- const model = params.model ?? "imagen-3.0-fast-generate-001";
58
- //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
58
+ const model = params.model ?? provider2ImageAgent["google"].defaultModel;
59
59
  const projectId = config?.projectId;
60
60
  const token = config?.token;
61
61
  try {
@@ -1,20 +1,5 @@
1
1
  import { AgentFunction, AgentFunctionInfo } from "graphai";
2
- type OpenAIModeration = "low" | "auto";
3
- export declare const imageOpenaiAgent: AgentFunction<{
4
- apiKey: string;
5
- model: string;
6
- moderation: OpenAIModeration | null | undefined;
7
- canvasSize: {
8
- width: number;
9
- height: number;
10
- };
11
- }, {
12
- buffer: Buffer;
13
- }, {
14
- prompt: string;
15
- images: string[] | null | undefined;
16
- }, {
17
- apiKey?: string;
18
- }>;
2
+ import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
19
4
  declare const imageOpenaiAgentInfo: AgentFunctionInfo;
20
5
  export default imageOpenaiAgentInfo;
@@ -2,14 +2,14 @@ import fs from "fs";
2
2
  import path from "path";
3
3
  import { GraphAILogger } from "graphai";
4
4
  import OpenAI, { toFile } from "openai";
5
- import { defaultOpenAIImageModel } from "../utils/const.js";
5
+ import { provider2ImageAgent } from "../utils/provider2agent.js";
6
6
  // https://platform.openai.com/docs/guides/image-generation
7
- export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
8
- const { prompt, images } = namedInputs;
7
+ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
8
+ const { prompt, referenceImages } = namedInputs;
9
9
  const { moderation, canvasSize } = params;
10
- const { apiKey } = { ...config };
11
- const model = params.model ?? defaultOpenAIImageModel;
12
- const openai = new OpenAI({ apiKey });
10
+ const { apiKey, baseURL } = { ...config };
11
+ const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
12
+ const openai = new OpenAI({ apiKey, baseURL });
13
13
  const size = (() => {
14
14
  if (model === "gpt-image-1") {
15
15
  if (canvasSize.width > canvasSize.height) {
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
46
46
  const response = await (async () => {
47
47
  try {
48
48
  const targetSize = imageOptions.size;
49
- if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
- const imagelist = await Promise.all((images ?? []).map(async (file) => {
49
+ if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
+ const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
51
51
  const ext = path.extname(file).toLowerCase();
52
52
  const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
53
53
  return await toFile(fs.createReadStream(file), null, { type });
54
54
  }));
55
- return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
55
+ return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
56
56
  }
57
57
  else {
58
58
  return await openai.images.generate(imageOptions);
@@ -1,24 +1,9 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type MovieGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
2
+ import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
6
3
  export declare const getAspectRatio: (canvasSize: {
7
4
  width: number;
8
5
  height: number;
9
6
  }) => string;
10
- export declare const movieGoogleAgent: AgentFunction<{
11
- model: string;
12
- canvasSize: {
13
- width: number;
14
- height: number;
15
- };
16
- duration?: number;
17
- }, {
18
- buffer: Buffer;
19
- }, {
20
- prompt: string;
21
- imagePath?: string;
22
- }, MovieGoogleConfig>;
7
+ export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
23
8
  declare const movieGoogleAgentInfo: AgentFunctionInfo;
24
9
  export default movieGoogleAgentInfo;