mulmocast 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/assets/templates/characters.json +16 -0
  2. package/assets/templates/html.json +6 -0
  3. package/lib/actions/audio.js +8 -6
  4. package/lib/actions/image_agents.d.ts +121 -0
  5. package/lib/actions/image_agents.js +56 -0
  6. package/lib/actions/image_references.d.ts +9 -0
  7. package/lib/actions/image_references.js +79 -0
  8. package/lib/actions/images.d.ts +9 -105
  9. package/lib/actions/images.js +83 -182
  10. package/lib/actions/index.d.ts +2 -0
  11. package/lib/actions/index.js +2 -0
  12. package/lib/actions/movie.js +3 -1
  13. package/lib/actions/pdf.js +5 -2
  14. package/lib/agents/image_google_agent.d.ts +2 -15
  15. package/lib/agents/image_google_agent.js +3 -3
  16. package/lib/agents/image_openai_agent.d.ts +2 -17
  17. package/lib/agents/image_openai_agent.js +7 -7
  18. package/lib/agents/movie_google_agent.d.ts +2 -17
  19. package/lib/agents/movie_google_agent.js +7 -7
  20. package/lib/agents/movie_replicate_agent.d.ts +2 -16
  21. package/lib/agents/movie_replicate_agent.js +3 -3
  22. package/lib/agents/tts_google_agent.d.ts +9 -1
  23. package/lib/agents/tts_google_agent.js +2 -2
  24. package/lib/agents/tts_nijivoice_agent.js +1 -1
  25. package/lib/agents/tts_openai_agent.d.ts +13 -1
  26. package/lib/agents/tts_openai_agent.js +2 -2
  27. package/lib/cli/helpers.js +7 -7
  28. package/lib/methods/index.d.ts +1 -0
  29. package/lib/methods/index.js +1 -0
  30. package/lib/methods/mulmo_beat.d.ts +6 -0
  31. package/lib/methods/mulmo_beat.js +21 -0
  32. package/lib/methods/mulmo_presentation_style.d.ts +2 -0
  33. package/lib/methods/mulmo_presentation_style.js +24 -0
  34. package/lib/methods/mulmo_studio_context.js +3 -0
  35. package/lib/tools/story_to_script.js +2 -2
  36. package/lib/types/agent.d.ts +55 -0
  37. package/lib/types/agent.js +3 -0
  38. package/lib/types/schema.d.ts +322 -74
  39. package/lib/types/schema.js +10 -2
  40. package/lib/types/type.d.ts +3 -2
  41. package/lib/utils/context.d.ts +13 -2
  42. package/lib/utils/context.js +2 -0
  43. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  44. package/lib/utils/ffmpeg_utils.js +1 -1
  45. package/lib/utils/file.js +4 -4
  46. package/lib/utils/filters.js +11 -7
  47. package/lib/utils/markdown.js +1 -1
  48. package/lib/utils/preprocess.d.ts +8 -2
  49. package/lib/utils/string.js +5 -5
  50. package/lib/utils/utils.d.ts +8 -1
  51. package/lib/utils/utils.js +51 -36
  52. package/package.json +10 -9
  53. package/scripts/templates/html.json +42 -0
  54. package/scripts/templates/image_refs.json +35 -0
@@ -1,88 +1,32 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
3
  import { GraphAI, GraphAILogger, TaskManager } from "graphai";
4
+ import { GoogleAuth } from "google-auth-library";
4
5
  import * as agents from "@graphai/vanilla";
5
6
  import { openAIAgent } from "@graphai/openai_agent";
6
7
  import { anthropicAgent } from "@graphai/anthropic_agent";
7
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
8
- import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
9
- import { fileCacheAgentFilter } from "../utils/filters.js";
10
9
  import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
11
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
12
- import { findImagePlugin } from "../utils/image_plugins/index.js";
13
- import { userAssert, settings2GraphAIConfig, getExtention } from "../utils/utils.js";
14
- import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
15
- import { defaultOpenAIImageModel } from "../utils/const.js";
16
- import { renderHTMLToImage } from "../utils/markdown.js";
17
- const vanillaAgents = agents.default ?? agents;
18
- dotenv.config();
19
- import { GoogleAuth } from "google-auth-library";
11
+ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
12
+ import { fileCacheAgentFilter } from "../utils/filters.js";
13
+ import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
20
14
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
21
- const htmlStyle = (context, beat) => {
22
- return {
23
- canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
24
- textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
25
- };
26
- };
27
- export const imagePreprocessAgent = async (namedInputs) => {
28
- const { context, beat, index, imageRefs } = namedInputs;
29
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
30
- // const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
31
- const imagePath = getBeatPngImagePath(context, index);
32
- const returnValue = {
33
- imageParams: imageAgentInfo.imageParams,
34
- movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
35
- };
36
- if (beat.image) {
37
- const plugin = findImagePlugin(beat?.image?.type);
38
- if (!plugin) {
39
- throw new Error(`invalid beat image type: ${beat.image}`);
40
- }
41
- const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
42
- // undefined prompt indicates that image generation is not needed
43
- return { imagePath: path, referenceImage: path, ...returnValue };
44
- }
45
- if (beat.htmlPrompt) {
46
- const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
47
- return { imagePath, htmlPrompt, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
48
- }
49
- // images for "edit_image"
50
- const images = (() => {
51
- const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
52
- const sources = imageNames.map((name) => imageRefs[name]);
53
- return sources.filter((source) => source !== undefined);
54
- })();
55
- if (beat.moviePrompt && !beat.imagePrompt) {
56
- return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
57
- }
58
- const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
59
- return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
60
- };
61
- export const imagePluginAgent = async (namedInputs) => {
62
- const { context, beat, index } = namedInputs;
63
- const imagePath = getBeatPngImagePath(context, index);
64
- const plugin = findImagePlugin(beat?.image?.type);
65
- if (!plugin) {
66
- throw new Error(`invalid beat image type: ${beat.image}`);
67
- }
68
- try {
69
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
70
- const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
71
- await plugin.process(processorParams);
72
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
73
- }
74
- catch (error) {
75
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
76
- throw error;
77
- }
78
- };
79
- const htmlImageGeneratorAgent = async (namedInputs) => {
80
- const { html, file, canvasSize } = namedInputs;
81
- // Save HTML file
82
- const htmlFile = file.replace(/\.[^/.]+$/, ".html");
83
- await fs.promises.writeFile(htmlFile, html, "utf8");
84
- await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
15
+ import { getImageRefs } from "./image_references.js";
16
+ import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ const vanillaAgents = agents.default ?? agents;
18
+ const imageAgents = {
19
+ ...vanillaAgents,
20
+ imageGoogleAgent,
21
+ movieGoogleAgent,
22
+ movieReplicateAgent,
23
+ imageOpenaiAgent,
24
+ mediaMockAgent,
25
+ fileWriteAgent,
26
+ openAIAgent,
27
+ anthropicAgent,
85
28
  };
29
+ dotenv.config();
86
30
  const beat_graph_data = {
87
31
  version: 0.5,
88
32
  concurrency: 4,
@@ -93,6 +37,8 @@ const beat_graph_data = {
93
37
  imageRefs: {},
94
38
  beat: {},
95
39
  __mapIndex: {},
40
+ forceMovie: { value: false },
41
+ forceImage: { value: false },
96
42
  preprocessor: {
97
43
  agent: imagePreprocessAgent,
98
44
  inputs: {
@@ -110,7 +56,7 @@ const beat_graph_data = {
110
56
  context: ":context",
111
57
  beat: ":beat",
112
58
  index: ":__mapIndex",
113
- onComplete: ":preprocessor",
59
+ onComplete: [":preprocessor"],
114
60
  },
115
61
  },
116
62
  htmlImageAgent: {
@@ -124,19 +70,38 @@ const beat_graph_data = {
124
70
  model: ":htmlImageAgentInfo.model",
125
71
  max_tokens: ":htmlImageAgentInfo.max_tokens",
126
72
  },
73
+ cache: {
74
+ force: [":context.force", ":forceImage"],
75
+ file: ":preprocessor.htmlPath",
76
+ index: ":__mapIndex",
77
+ mulmoContext: ":context",
78
+ sessionType: "html",
79
+ },
127
80
  },
128
81
  },
82
+ htmlReader: {
83
+ if: ":preprocessor.htmlPrompt",
84
+ agent: async (namedInputs) => {
85
+ const html = await fs.promises.readFile(namedInputs.htmlPath, "utf8");
86
+ return { html };
87
+ },
88
+ inputs: {
89
+ onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
90
+ htmlPath: ":preprocessor.htmlPath",
91
+ },
92
+ output: {
93
+ htmlText: ".html.codeBlockOrRaw()",
94
+ },
95
+ defaultValue: {},
96
+ },
129
97
  htmlImageGenerator: {
130
98
  if: ":preprocessor.htmlPrompt",
131
99
  defaultValue: {},
132
100
  agent: htmlImageGeneratorAgent,
133
101
  inputs: {
134
- html: ":htmlImageAgent.text.codeBlockOrRaw()",
102
+ htmlText: ":htmlReader.htmlText",
135
103
  canvasSize: ":context.presentationStyle.canvasSize",
136
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
137
- mulmoContext: ":context", // for fileCacheAgentFilter
138
- index: ":__mapIndex", // for fileCacheAgentFilter
139
- sessionType: "image", // for fileCacheAgentFilter
104
+ file: ":preprocessor.imagePath",
140
105
  },
141
106
  },
142
107
  imageGenerator: {
@@ -145,12 +110,14 @@ const beat_graph_data = {
145
110
  retry: 2,
146
111
  inputs: {
147
112
  prompt: ":preprocessor.prompt",
148
- images: ":preprocessor.images",
149
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
150
- force: ":context.force", // only for fileCacheAgentFilter
151
- mulmoContext: ":context", // for fileCacheAgentFilter
152
- index: ":__mapIndex", // for fileCacheAgentFilter
153
- sessionType: "image", // for fileCacheAgentFilter
113
+ referenceImages: ":preprocessor.referenceImages",
114
+ cache: {
115
+ force: [":context.force", ":forceImage"],
116
+ file: ":preprocessor.imagePath",
117
+ index: ":__mapIndex",
118
+ mulmoContext: ":context",
119
+ sessionType: "image",
120
+ },
154
121
  params: {
155
122
  model: ":preprocessor.imageParams.model",
156
123
  moderation: ":preprocessor.imageParams.moderation",
@@ -165,12 +132,14 @@ const beat_graph_data = {
165
132
  inputs: {
166
133
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
167
134
  prompt: ":beat.moviePrompt",
168
- imagePath: ":preprocessor.referenceImage",
169
- file: ":preprocessor.movieFile",
170
- studio: ":context.studio", // for cache
171
- mulmoContext: ":context", // for fileCacheAgentFilter
172
- index: ":__mapIndex", // for cache
173
- sessionType: "movie", // for cache
135
+ imagePath: ":preprocessor.referenceImageForMovie",
136
+ cache: {
137
+ force: [":context.force", ":forceMovie"],
138
+ file: ":preprocessor.movieFile",
139
+ index: ":__mapIndex",
140
+ sessionType: "movie",
141
+ mulmoContext: ":context",
142
+ },
174
143
  params: {
175
144
  model: ":context.presentationStyle.movieParams.model",
176
145
  duration: ":beat.duration",
@@ -182,15 +151,14 @@ const beat_graph_data = {
182
151
  imageFromMovie: {
183
152
  if: ":preprocessor.imageFromMovie",
184
153
  agent: async (namedInputs) => {
185
- await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
186
- return { generatedImage: true };
154
+ return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
187
155
  },
188
156
  inputs: {
189
- onComplete: ":movieGenerator", // to wait for movieGenerator to finish
157
+ onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
190
158
  imageFile: ":preprocessor.imagePath",
191
159
  movieFile: ":preprocessor.movieFile",
192
160
  },
193
- defaultValue: { generatedImage: false },
161
+ defaultValue: {},
194
162
  },
195
163
  output: {
196
164
  agent: "copyAgent",
@@ -290,21 +258,19 @@ const googleAuth = async () => {
290
258
  throw error;
291
259
  }
292
260
  };
293
- const graphOption = async (context, settings) => {
294
- const agentFilters = [
295
- {
296
- name: "fileCacheAgentFilter",
297
- agent: fileCacheAgentFilter,
298
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator"],
299
- },
300
- ];
301
- const taskManager = new TaskManager(getConcurrency(context));
261
+ export const graphOption = async (context, settings) => {
302
262
  const options = {
303
- agentFilters,
304
- taskManager,
263
+ agentFilters: [
264
+ {
265
+ name: "fileCacheAgentFilter",
266
+ agent: fileCacheAgentFilter,
267
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
268
+ },
269
+ ],
270
+ taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
305
271
  };
306
272
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
307
- const config = settings2GraphAIConfig(settings);
273
+ const config = settings2GraphAIConfig(settings, process.env);
308
274
  // We need to get google's auth token only if the google is the text2image provider.
309
275
  if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
310
276
  userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
@@ -322,32 +288,6 @@ const graphOption = async (context, settings) => {
322
288
  options.config = config;
323
289
  return options;
324
290
  };
325
- // TODO: unit test
326
- export const getImageRefs = async (context) => {
327
- const imageRefs = {};
328
- const images = context.presentationStyle.imageParams?.images;
329
- if (images) {
330
- await Promise.all(Object.keys(images).map(async (key) => {
331
- const image = images[key];
332
- if (image.source.kind === "path") {
333
- imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
334
- }
335
- else if (image.source.kind === "url") {
336
- const response = await fetch(image.source.url);
337
- if (!response.ok) {
338
- throw new Error(`Failed to download image: ${image.source.url}`);
339
- }
340
- const buffer = Buffer.from(await response.arrayBuffer());
341
- // Detect file extension from Content-Type header or URL
342
- const extension = getExtention(response.headers.get("content-type"), image.source.url);
343
- const imagePath = getReferenceImagePath(context, key, extension);
344
- await fs.promises.writeFile(imagePath, buffer);
345
- imageRefs[key] = imagePath;
346
- }
347
- }));
348
- }
349
- return imageRefs;
350
- };
351
291
  const prepareGenerateImages = async (context) => {
352
292
  const fileName = MulmoStudioContextMethods.getFileName(context);
353
293
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -356,56 +296,22 @@ const prepareGenerateImages = async (context) => {
356
296
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
357
297
  const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
358
298
  const imageRefs = await getImageRefs(context);
359
- // Determine movie agent based on provider
360
- const getMovieAgent = () => {
361
- const provider = context.presentationStyle.movieParams?.provider ?? "google";
362
- switch (provider) {
363
- case "replicate":
364
- return "movieReplicateAgent";
365
- case "google":
366
- default:
367
- return "movieGoogleAgent";
368
- }
369
- };
370
299
  GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
371
300
  const injections = {
372
301
  context,
373
302
  htmlImageAgentInfo,
374
303
  movieAgentInfo: {
375
- agent: getMovieAgent(),
304
+ agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
376
305
  },
377
306
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
378
307
  imageRefs,
379
308
  };
380
309
  return injections;
381
310
  };
382
- const getConcurrency = (context) => {
383
- if (context.presentationStyle.movieParams?.provider === "replicate") {
384
- return 4;
385
- }
386
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
387
- if (imageAgentInfo.imageParams.provider === "openai") {
388
- // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
389
- // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
390
- // gpt-image-1:3,000,000 TPM、150 images per minute
391
- return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
392
- }
393
- return 4;
394
- };
395
311
  const generateImages = async (context, settings, callbacks) => {
396
312
  const options = await graphOption(context, settings);
397
313
  const injections = await prepareGenerateImages(context);
398
- const graph = new GraphAI(graph_data, {
399
- ...vanillaAgents,
400
- imageGoogleAgent,
401
- movieGoogleAgent,
402
- movieReplicateAgent,
403
- imageOpenaiAgent,
404
- mediaMockAgent,
405
- fileWriteAgent,
406
- openAIAgent,
407
- anthropicAgent,
408
- }, options);
314
+ const graph = new GraphAI(graph_data, imageAgents, options);
409
315
  Object.keys(injections).forEach((key) => {
410
316
  graph.injectValue(key, injections[key]);
411
317
  });
@@ -417,6 +323,7 @@ const generateImages = async (context, settings, callbacks) => {
417
323
  const res = await graph.run();
418
324
  return res.mergeResult;
419
325
  };
326
+ // public api
420
327
  export const images = async (context, settings, callbacks) => {
421
328
  try {
422
329
  MulmoStudioContextMethods.setSessionState(context, "image", true);
@@ -429,20 +336,12 @@ export const images = async (context, settings, callbacks) => {
429
336
  throw error;
430
337
  }
431
338
  };
432
- export const generateBeatImage = async (index, context, settings, callbacks) => {
339
+ // public api
340
+ export const generateBeatImage = async (inputs) => {
341
+ const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
433
342
  const options = await graphOption(context, settings);
434
343
  const injections = await prepareGenerateImages(context);
435
- const graph = new GraphAI(beat_graph_data, {
436
- ...vanillaAgents,
437
- imageGoogleAgent,
438
- movieGoogleAgent,
439
- movieReplicateAgent,
440
- imageOpenaiAgent,
441
- mediaMockAgent,
442
- fileWriteAgent,
443
- openAIAgent,
444
- anthropicAgent,
445
- }, options);
344
+ const graph = new GraphAI(beat_graph_data, imageAgents, options);
446
345
  Object.keys(injections).forEach((key) => {
447
346
  if ("outputStudioFilePath" !== key) {
448
347
  graph.injectValue(key, injections[key]);
@@ -450,6 +349,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
450
349
  });
451
350
  graph.injectValue("__mapIndex", index);
452
351
  graph.injectValue("beat", context.studio.script.beats[index]);
352
+ graph.injectValue("forceMovie", forceMovie ?? false);
353
+ graph.injectValue("forceImage", forceImage ?? false);
453
354
  if (callbacks) {
454
355
  callbacks.forEach((callback) => {
455
356
  graph.registerCallback(callback);
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
218
218
  // Concatenate the trimmed images
219
219
  const concatVideoId = "concat_video";
220
220
  const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
221
- ffmpegContext.filterComplex.push(`${videoIds.map((id) => `[${id}]`).join("")}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`);
221
+ const inputs = videoIds.map((id) => `[${id}]`).join("");
222
+ const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
223
+ ffmpegContext.filterComplex.push(filter);
222
224
  const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
223
225
  const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
224
226
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
@@ -1,6 +1,7 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import puppeteer from "puppeteer";
4
+ import { GraphAILogger } from "graphai";
4
5
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
5
6
  import { localizedText, isHttp } from "../utils/utils.js";
6
7
  import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
17
18
  const mimeType = ext === "jpg" ? "jpeg" : ext;
18
19
  return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
19
20
  }
20
- catch (__error) {
21
+ catch (error) {
22
+ GraphAILogger.info("loadImage failed", error);
21
23
  const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
22
24
  return `data:image/png;base64,${placeholderData.toString("base64")}`;
23
25
  }
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
101
103
  const imagePaths = studio.beats.map((beat) => beat.imageFile);
102
104
  const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
103
105
  const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
104
- const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
106
+ const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
107
+ const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
105
108
  const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
106
109
  const template = getHTMLFile(`pdf_${pdfMode}`);
107
110
  const baseTemplateData = {
@@ -1,18 +1,5 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type ImageGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
6
- export declare const imageGoogleAgent: AgentFunction<{
7
- model: string;
8
- canvasSize: {
9
- width: number;
10
- height: number;
11
- };
12
- }, {
13
- buffer: Buffer;
14
- }, {
15
- prompt: string;
16
- }, ImageGoogleConfig>;
2
+ import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
17
4
  declare const imageGoogleAgentInfo: AgentFunctionInfo;
18
5
  export default imageGoogleAgentInfo;
@@ -7,12 +7,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
7
7
  const payload = {
8
8
  instances: [
9
9
  {
10
- prompt: prompt,
10
+ prompt,
11
11
  },
12
12
  ],
13
13
  parameters: {
14
14
  sampleCount: 1,
15
- aspectRatio: aspectRatio,
15
+ aspectRatio,
16
16
  safetySetting: "block_only_high",
17
17
  },
18
18
  };
@@ -51,7 +51,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
51
51
  throw error;
52
52
  }
53
53
  }
54
- export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
54
+ export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
55
55
  const { prompt } = namedInputs;
56
56
  const aspectRatio = getAspectRatio(params.canvasSize);
57
57
  const model = params.model ?? "imagen-3.0-fast-generate-001";
@@ -1,20 +1,5 @@
1
1
  import { AgentFunction, AgentFunctionInfo } from "graphai";
2
- type OpenAIModeration = "low" | "auto";
3
- export declare const imageOpenaiAgent: AgentFunction<{
4
- apiKey: string;
5
- model: string;
6
- moderation: OpenAIModeration | null | undefined;
7
- canvasSize: {
8
- width: number;
9
- height: number;
10
- };
11
- }, {
12
- buffer: Buffer;
13
- }, {
14
- prompt: string;
15
- images: string[] | null | undefined;
16
- }, {
17
- apiKey?: string;
18
- }>;
2
+ import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
19
4
  declare const imageOpenaiAgentInfo: AgentFunctionInfo;
20
5
  export default imageOpenaiAgentInfo;
@@ -4,12 +4,12 @@ import { GraphAILogger } from "graphai";
4
4
  import OpenAI, { toFile } from "openai";
5
5
  import { defaultOpenAIImageModel } from "../utils/const.js";
6
6
  // https://platform.openai.com/docs/guides/image-generation
7
- export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
8
- const { prompt, images } = namedInputs;
7
+ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
8
+ const { prompt, referenceImages } = namedInputs;
9
9
  const { moderation, canvasSize } = params;
10
- const { apiKey } = { ...config };
10
+ const { apiKey, baseURL } = { ...config };
11
11
  const model = params.model ?? defaultOpenAIImageModel;
12
- const openai = new OpenAI({ apiKey });
12
+ const openai = new OpenAI({ apiKey, baseURL });
13
13
  const size = (() => {
14
14
  if (model === "gpt-image-1") {
15
15
  if (canvasSize.width > canvasSize.height) {
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
46
46
  const response = await (async () => {
47
47
  try {
48
48
  const targetSize = imageOptions.size;
49
- if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
- const imagelist = await Promise.all((images ?? []).map(async (file) => {
49
+ if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
+ const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
51
51
  const ext = path.extname(file).toLowerCase();
52
52
  const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
53
53
  return await toFile(fs.createReadStream(file), null, { type });
54
54
  }));
55
- return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
55
+ return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
56
56
  }
57
57
  else {
58
58
  return await openai.images.generate(imageOptions);
@@ -1,24 +1,9 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type MovieGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
2
+ import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
6
3
  export declare const getAspectRatio: (canvasSize: {
7
4
  width: number;
8
5
  height: number;
9
6
  }) => string;
10
- export declare const movieGoogleAgent: AgentFunction<{
11
- model: string;
12
- canvasSize: {
13
- width: number;
14
- height: number;
15
- };
16
- duration?: number;
17
- }, {
18
- buffer: Buffer;
19
- }, {
20
- prompt: string;
21
- imagePath?: string;
22
- }, MovieGoogleConfig>;
7
+ export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
23
8
  declare const movieGoogleAgentInfo: AgentFunctionInfo;
24
9
  export default movieGoogleAgentInfo;
@@ -5,13 +5,13 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
5
5
  const payload = {
6
6
  instances: [
7
7
  {
8
- prompt: prompt,
8
+ prompt,
9
9
  image: undefined,
10
10
  },
11
11
  ],
12
12
  parameters: {
13
13
  sampleCount: 1,
14
- aspectRatio: aspectRatio,
14
+ aspectRatio,
15
15
  safetySetting: "block_only_high",
16
16
  personGeneration: "allow_all",
17
17
  durationSeconds: duration,
@@ -46,7 +46,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
46
46
  while (true) {
47
47
  GraphAILogger.info("...waiting for movie generation...");
48
48
  await sleep(3000);
49
- const response = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
49
+ const operationResponse = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
50
50
  method: "POST",
51
51
  headers: {
52
52
  Authorization: `Bearer ${token}`,
@@ -54,10 +54,10 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
54
54
  },
55
55
  body: JSON.stringify(fetchBody),
56
56
  });
57
- if (!response.ok) {
58
- throw new Error(`Error: ${response.status} - ${response.statusText}`);
57
+ if (!operationResponse.ok) {
58
+ throw new Error(`Error: ${operationResponse.status} - ${operationResponse.statusText}`);
59
59
  }
60
- const responseData = await response.json();
60
+ const responseData = await operationResponse.json();
61
61
  if (responseData.done) {
62
62
  if (responseData.error) {
63
63
  GraphAILogger.info("Prompt: ", prompt);
@@ -87,7 +87,7 @@ export const getAspectRatio = (canvasSize) => {
87
87
  return "1:1";
88
88
  }
89
89
  };
90
- export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
90
+ export const movieGoogleAgent = async ({ namedInputs, params, config, }) => {
91
91
  const { prompt, imagePath } = namedInputs;
92
92
  const aspectRatio = getAspectRatio(params.canvasSize);
93
93
  const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
@@ -1,23 +1,9 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentParams, ReplicateMovieAgentConfig } from "../types/agent.js";
2
3
  export declare const getAspectRatio: (canvasSize: {
3
4
  width: number;
4
5
  height: number;
5
6
  }) => string;
6
- export type MovieReplicateConfig = {
7
- apiKey?: string;
8
- };
9
- export declare const movieReplicateAgent: AgentFunction<{
10
- model: `${string}/${string}` | undefined;
11
- canvasSize: {
12
- width: number;
13
- height: number;
14
- };
15
- duration?: number;
16
- }, {
17
- buffer: Buffer;
18
- }, {
19
- prompt: string;
20
- imagePath?: string;
21
- }, MovieReplicateConfig>;
7
+ export declare const movieReplicateAgent: AgentFunction<ReplicateMovieAgentParams, AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentConfig>;
22
8
  declare const movieReplicateAgentInfo: AgentFunctionInfo;
23
9
  export default movieReplicateAgentInfo;