mulmocast 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/assets/templates/characters.json +16 -0
  2. package/assets/templates/html.json +6 -0
  3. package/lib/actions/audio.js +8 -6
  4. package/lib/actions/image_agents.d.ts +121 -0
  5. package/lib/actions/image_agents.js +56 -0
  6. package/lib/actions/image_references.d.ts +9 -0
  7. package/lib/actions/image_references.js +79 -0
  8. package/lib/actions/images.d.ts +9 -109
  9. package/lib/actions/images.js +68 -184
  10. package/lib/actions/index.d.ts +2 -0
  11. package/lib/actions/index.js +2 -0
  12. package/lib/actions/movie.js +3 -1
  13. package/lib/actions/pdf.js +5 -2
  14. package/lib/agents/image_google_agent.d.ts +2 -15
  15. package/lib/agents/image_google_agent.js +3 -3
  16. package/lib/agents/image_openai_agent.d.ts +2 -17
  17. package/lib/agents/image_openai_agent.js +7 -7
  18. package/lib/agents/movie_google_agent.d.ts +2 -17
  19. package/lib/agents/movie_google_agent.js +7 -7
  20. package/lib/agents/movie_replicate_agent.d.ts +2 -16
  21. package/lib/agents/movie_replicate_agent.js +3 -3
  22. package/lib/agents/tts_google_agent.d.ts +9 -1
  23. package/lib/agents/tts_google_agent.js +2 -2
  24. package/lib/agents/tts_nijivoice_agent.js +1 -1
  25. package/lib/agents/tts_openai_agent.d.ts +13 -1
  26. package/lib/agents/tts_openai_agent.js +2 -2
  27. package/lib/cli/helpers.js +7 -7
  28. package/lib/methods/index.d.ts +1 -0
  29. package/lib/methods/index.js +1 -0
  30. package/lib/methods/mulmo_beat.d.ts +6 -0
  31. package/lib/methods/mulmo_beat.js +21 -0
  32. package/lib/methods/mulmo_presentation_style.d.ts +2 -0
  33. package/lib/methods/mulmo_presentation_style.js +24 -0
  34. package/lib/methods/mulmo_studio_context.js +3 -0
  35. package/lib/tools/story_to_script.js +2 -2
  36. package/lib/types/agent.d.ts +55 -0
  37. package/lib/types/agent.js +3 -0
  38. package/lib/types/schema.d.ts +317 -74
  39. package/lib/types/schema.js +9 -2
  40. package/lib/types/type.d.ts +3 -2
  41. package/lib/utils/context.d.ts +12 -2
  42. package/lib/utils/context.js +1 -0
  43. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  44. package/lib/utils/ffmpeg_utils.js +1 -1
  45. package/lib/utils/file.js +4 -4
  46. package/lib/utils/filters.js +3 -4
  47. package/lib/utils/markdown.js +1 -1
  48. package/lib/utils/preprocess.d.ts +8 -2
  49. package/lib/utils/string.js +5 -5
  50. package/lib/utils/utils.d.ts +8 -1
  51. package/lib/utils/utils.js +51 -36
  52. package/package.json +7 -6
  53. package/scripts/templates/html.json +42 -0
  54. package/scripts/templates/image_refs.json +35 -0
@@ -1,86 +1,32 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
3
  import { GraphAI, GraphAILogger, TaskManager } from "graphai";
4
+ import { GoogleAuth } from "google-auth-library";
4
5
  import * as agents from "@graphai/vanilla";
5
6
  import { openAIAgent } from "@graphai/openai_agent";
6
7
  import { anthropicAgent } from "@graphai/anthropic_agent";
7
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
8
- import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
9
- import { fileCacheAgentFilter } from "../utils/filters.js";
10
9
  import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
11
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
12
- import { findImagePlugin } from "../utils/image_plugins/index.js";
13
- import { userAssert, settings2GraphAIConfig, getExtention } from "../utils/utils.js";
14
- import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
15
- import { defaultOpenAIImageModel } from "../utils/const.js";
16
- import { renderHTMLToImage } from "../utils/markdown.js";
17
- const vanillaAgents = agents.default ?? agents;
18
- dotenv.config();
19
- import { GoogleAuth } from "google-auth-library";
11
+ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
12
+ import { fileCacheAgentFilter } from "../utils/filters.js";
13
+ import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
20
14
  import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
21
- const htmlStyle = (context, beat) => {
22
- return {
23
- canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
24
- textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
25
- };
26
- };
27
- export const imagePreprocessAgent = async (namedInputs) => {
28
- const { context, beat, index, imageRefs } = namedInputs;
29
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
30
- // const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
31
- const imagePath = getBeatPngImagePath(context, index);
32
- const returnValue = {
33
- imageParams: imageAgentInfo.imageParams,
34
- movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
35
- };
36
- if (beat.image) {
37
- const plugin = findImagePlugin(beat?.image?.type);
38
- if (!plugin) {
39
- throw new Error(`invalid beat image type: ${beat.image}`);
40
- }
41
- const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
42
- // undefined prompt indicates that image generation is not needed
43
- return { imagePath: path, referenceImage: path, ...returnValue };
44
- }
45
- if (beat.htmlPrompt) {
46
- const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
47
- const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
48
- return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
49
- }
50
- // images for "edit_image"
51
- const images = (() => {
52
- const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
53
- const sources = imageNames.map((name) => imageRefs[name]);
54
- return sources.filter((source) => source !== undefined);
55
- })();
56
- if (beat.moviePrompt && !beat.imagePrompt) {
57
- return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
58
- }
59
- const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
60
- return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
61
- };
62
- export const imagePluginAgent = async (namedInputs) => {
63
- const { context, beat, index } = namedInputs;
64
- const imagePath = getBeatPngImagePath(context, index);
65
- const plugin = findImagePlugin(beat?.image?.type);
66
- if (!plugin) {
67
- throw new Error(`invalid beat image type: ${beat.image}`);
68
- }
69
- try {
70
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
71
- const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
72
- await plugin.process(processorParams);
73
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
74
- }
75
- catch (error) {
76
- MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
77
- throw error;
78
- }
79
- };
80
- const htmlImageGeneratorAgent = async (namedInputs) => {
81
- const { file, canvasSize, htmlText } = namedInputs;
82
- await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
15
+ import { getImageRefs } from "./image_references.js";
16
+ import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
17
+ const vanillaAgents = agents.default ?? agents;
18
+ const imageAgents = {
19
+ ...vanillaAgents,
20
+ imageGoogleAgent,
21
+ movieGoogleAgent,
22
+ movieReplicateAgent,
23
+ imageOpenaiAgent,
24
+ mediaMockAgent,
25
+ fileWriteAgent,
26
+ openAIAgent,
27
+ anthropicAgent,
83
28
  };
29
+ dotenv.config();
84
30
  const beat_graph_data = {
85
31
  version: 0.5,
86
32
  concurrency: 4,
@@ -91,6 +37,8 @@ const beat_graph_data = {
91
37
  imageRefs: {},
92
38
  beat: {},
93
39
  __mapIndex: {},
40
+ forceMovie: { value: false },
41
+ forceImage: { value: false },
94
42
  preprocessor: {
95
43
  agent: imagePreprocessAgent,
96
44
  inputs: {
@@ -108,7 +56,7 @@ const beat_graph_data = {
108
56
  context: ":context",
109
57
  beat: ":beat",
110
58
  index: ":__mapIndex",
111
- onComplete: ":preprocessor",
59
+ onComplete: [":preprocessor"],
112
60
  },
113
61
  },
114
62
  htmlImageAgent: {
@@ -122,10 +70,13 @@ const beat_graph_data = {
122
70
  model: ":htmlImageAgentInfo.model",
123
71
  max_tokens: ":htmlImageAgentInfo.max_tokens",
124
72
  },
125
- file: ":preprocessor.htmlPath", // only for fileCacheAgentFilter
126
- mulmoContext: ":context", // for fileCacheAgentFilter
127
- index: ":__mapIndex", // for fileCacheAgentFilter
128
- sessionType: "html", // for fileCacheAgentFilter
73
+ cache: {
74
+ force: [":context.force", ":forceImage"],
75
+ file: ":preprocessor.htmlPath",
76
+ index: ":__mapIndex",
77
+ mulmoContext: ":context",
78
+ sessionType: "html",
79
+ },
129
80
  },
130
81
  },
131
82
  htmlReader: {
@@ -135,7 +86,7 @@ const beat_graph_data = {
135
86
  return { html };
136
87
  },
137
88
  inputs: {
138
- onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
89
+ onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
139
90
  htmlPath: ":preprocessor.htmlPath",
140
91
  },
141
92
  output: {
@@ -150,10 +101,7 @@ const beat_graph_data = {
150
101
  inputs: {
151
102
  htmlText: ":htmlReader.htmlText",
152
103
  canvasSize: ":context.presentationStyle.canvasSize",
153
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
154
- mulmoContext: ":context", // for fileCacheAgentFilter
155
- index: ":__mapIndex", // for fileCacheAgentFilter
156
- sessionType: "image", // for fileCacheAgentFilter
104
+ file: ":preprocessor.imagePath",
157
105
  },
158
106
  },
159
107
  imageGenerator: {
@@ -162,12 +110,14 @@ const beat_graph_data = {
162
110
  retry: 2,
163
111
  inputs: {
164
112
  prompt: ":preprocessor.prompt",
165
- images: ":preprocessor.images",
166
- file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
167
- force: ":context.force", // only for fileCacheAgentFilter
168
- mulmoContext: ":context", // for fileCacheAgentFilter
169
- index: ":__mapIndex", // for fileCacheAgentFilter
170
- sessionType: "image", // for fileCacheAgentFilter
113
+ referenceImages: ":preprocessor.referenceImages",
114
+ cache: {
115
+ force: [":context.force", ":forceImage"],
116
+ file: ":preprocessor.imagePath",
117
+ index: ":__mapIndex",
118
+ mulmoContext: ":context",
119
+ sessionType: "image",
120
+ },
171
121
  params: {
172
122
  model: ":preprocessor.imageParams.model",
173
123
  moderation: ":preprocessor.imageParams.moderation",
@@ -182,12 +132,14 @@ const beat_graph_data = {
182
132
  inputs: {
183
133
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
184
134
  prompt: ":beat.moviePrompt",
185
- imagePath: ":preprocessor.referenceImage",
186
- file: ":preprocessor.movieFile",
187
- studio: ":context.studio", // for cache
188
- mulmoContext: ":context", // for fileCacheAgentFilter
189
- index: ":__mapIndex", // for cache
190
- sessionType: "movie", // for cache
135
+ imagePath: ":preprocessor.referenceImageForMovie",
136
+ cache: {
137
+ force: [":context.force", ":forceMovie"],
138
+ file: ":preprocessor.movieFile",
139
+ index: ":__mapIndex",
140
+ sessionType: "movie",
141
+ mulmoContext: ":context",
142
+ },
191
143
  params: {
192
144
  model: ":context.presentationStyle.movieParams.model",
193
145
  duration: ":beat.duration",
@@ -199,15 +151,14 @@ const beat_graph_data = {
199
151
  imageFromMovie: {
200
152
  if: ":preprocessor.imageFromMovie",
201
153
  agent: async (namedInputs) => {
202
- await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
203
- return { generatedImage: true };
154
+ return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
204
155
  },
205
156
  inputs: {
206
- onComplete: ":movieGenerator", // to wait for movieGenerator to finish
157
+ onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
207
158
  imageFile: ":preprocessor.imagePath",
208
159
  movieFile: ":preprocessor.movieFile",
209
160
  },
210
- defaultValue: { generatedImage: false },
161
+ defaultValue: {},
211
162
  },
212
163
  output: {
213
164
  agent: "copyAgent",
@@ -307,21 +258,19 @@ const googleAuth = async () => {
307
258
  throw error;
308
259
  }
309
260
  };
310
- const graphOption = async (context, settings) => {
311
- const agentFilters = [
312
- {
313
- name: "fileCacheAgentFilter",
314
- agent: fileCacheAgentFilter,
315
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
316
- },
317
- ];
318
- const taskManager = new TaskManager(getConcurrency(context));
261
+ export const graphOption = async (context, settings) => {
319
262
  const options = {
320
- agentFilters,
321
- taskManager,
263
+ agentFilters: [
264
+ {
265
+ name: "fileCacheAgentFilter",
266
+ agent: fileCacheAgentFilter,
267
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
268
+ },
269
+ ],
270
+ taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
322
271
  };
323
272
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
324
- const config = settings2GraphAIConfig(settings);
273
+ const config = settings2GraphAIConfig(settings, process.env);
325
274
  // We need to get google's auth token only if the google is the text2image provider.
326
275
  if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
327
276
  userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
@@ -339,32 +288,6 @@ const graphOption = async (context, settings) => {
339
288
  options.config = config;
340
289
  return options;
341
290
  };
342
- // TODO: unit test
343
- export const getImageRefs = async (context) => {
344
- const imageRefs = {};
345
- const images = context.presentationStyle.imageParams?.images;
346
- if (images) {
347
- await Promise.all(Object.keys(images).map(async (key) => {
348
- const image = images[key];
349
- if (image.source.kind === "path") {
350
- imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
351
- }
352
- else if (image.source.kind === "url") {
353
- const response = await fetch(image.source.url);
354
- if (!response.ok) {
355
- throw new Error(`Failed to download image: ${image.source.url}`);
356
- }
357
- const buffer = Buffer.from(await response.arrayBuffer());
358
- // Detect file extension from Content-Type header or URL
359
- const extension = getExtention(response.headers.get("content-type"), image.source.url);
360
- const imagePath = getReferenceImagePath(context, key, extension);
361
- await fs.promises.writeFile(imagePath, buffer);
362
- imageRefs[key] = imagePath;
363
- }
364
- }));
365
- }
366
- return imageRefs;
367
- };
368
291
  const prepareGenerateImages = async (context) => {
369
292
  const fileName = MulmoStudioContextMethods.getFileName(context);
370
293
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -373,56 +296,22 @@ const prepareGenerateImages = async (context) => {
373
296
  const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
374
297
  const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
375
298
  const imageRefs = await getImageRefs(context);
376
- // Determine movie agent based on provider
377
- const getMovieAgent = () => {
378
- const provider = context.presentationStyle.movieParams?.provider ?? "google";
379
- switch (provider) {
380
- case "replicate":
381
- return "movieReplicateAgent";
382
- case "google":
383
- default:
384
- return "movieGoogleAgent";
385
- }
386
- };
387
299
  GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
388
300
  const injections = {
389
301
  context,
390
302
  htmlImageAgentInfo,
391
303
  movieAgentInfo: {
392
- agent: getMovieAgent(),
304
+ agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
393
305
  },
394
306
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
395
307
  imageRefs,
396
308
  };
397
309
  return injections;
398
310
  };
399
- const getConcurrency = (context) => {
400
- if (context.presentationStyle.movieParams?.provider === "replicate") {
401
- return 4;
402
- }
403
- const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
404
- if (imageAgentInfo.imageParams.provider === "openai") {
405
- // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
406
- // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
407
- // gpt-image-1:3,000,000 TPM、150 images per minute
408
- return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
409
- }
410
- return 4;
411
- };
412
311
  const generateImages = async (context, settings, callbacks) => {
413
312
  const options = await graphOption(context, settings);
414
313
  const injections = await prepareGenerateImages(context);
415
- const graph = new GraphAI(graph_data, {
416
- ...vanillaAgents,
417
- imageGoogleAgent,
418
- movieGoogleAgent,
419
- movieReplicateAgent,
420
- imageOpenaiAgent,
421
- mediaMockAgent,
422
- fileWriteAgent,
423
- openAIAgent,
424
- anthropicAgent,
425
- }, options);
314
+ const graph = new GraphAI(graph_data, imageAgents, options);
426
315
  Object.keys(injections).forEach((key) => {
427
316
  graph.injectValue(key, injections[key]);
428
317
  });
@@ -434,6 +323,7 @@ const generateImages = async (context, settings, callbacks) => {
434
323
  const res = await graph.run();
435
324
  return res.mergeResult;
436
325
  };
326
+ // public api
437
327
  export const images = async (context, settings, callbacks) => {
438
328
  try {
439
329
  MulmoStudioContextMethods.setSessionState(context, "image", true);
@@ -446,20 +336,12 @@ export const images = async (context, settings, callbacks) => {
446
336
  throw error;
447
337
  }
448
338
  };
449
- export const generateBeatImage = async (index, context, settings, callbacks) => {
339
+ // public api
340
+ export const generateBeatImage = async (inputs) => {
341
+ const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
450
342
  const options = await graphOption(context, settings);
451
343
  const injections = await prepareGenerateImages(context);
452
- const graph = new GraphAI(beat_graph_data, {
453
- ...vanillaAgents,
454
- imageGoogleAgent,
455
- movieGoogleAgent,
456
- movieReplicateAgent,
457
- imageOpenaiAgent,
458
- mediaMockAgent,
459
- fileWriteAgent,
460
- openAIAgent,
461
- anthropicAgent,
462
- }, options);
344
+ const graph = new GraphAI(beat_graph_data, imageAgents, options);
463
345
  Object.keys(injections).forEach((key) => {
464
346
  if ("outputStudioFilePath" !== key) {
465
347
  graph.injectValue(key, injections[key]);
@@ -467,6 +349,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
467
349
  });
468
350
  graph.injectValue("__mapIndex", index);
469
351
  graph.injectValue("beat", context.studio.script.beats[index]);
352
+ graph.injectValue("forceMovie", forceMovie ?? false);
353
+ graph.injectValue("forceImage", forceImage ?? false);
470
354
  if (callbacks) {
471
355
  callbacks.forEach((callback) => {
472
356
  graph.registerCallback(callback);
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -1,5 +1,7 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./image_references.js";
4
+ export * from "./image_agents.js";
3
5
  export * from "./movie.js";
4
6
  export * from "./pdf.js";
5
7
  export * from "./translate.js";
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
218
218
  // Concatenate the trimmed images
219
219
  const concatVideoId = "concat_video";
220
220
  const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
221
- ffmpegContext.filterComplex.push(`${videoIds.map((id) => `[${id}]`).join("")}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`);
221
+ const inputs = videoIds.map((id) => `[${id}]`).join("");
222
+ const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
223
+ ffmpegContext.filterComplex.push(filter);
222
224
  const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
223
225
  const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
224
226
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
@@ -1,6 +1,7 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import puppeteer from "puppeteer";
4
+ import { GraphAILogger } from "graphai";
4
5
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
5
6
  import { localizedText, isHttp } from "../utils/utils.js";
6
7
  import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
17
18
  const mimeType = ext === "jpg" ? "jpeg" : ext;
18
19
  return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
19
20
  }
20
- catch (__error) {
21
+ catch (error) {
22
+ GraphAILogger.info("loadImage failed", error);
21
23
  const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
22
24
  return `data:image/png;base64,${placeholderData.toString("base64")}`;
23
25
  }
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
101
103
  const imagePaths = studio.beats.map((beat) => beat.imageFile);
102
104
  const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
103
105
  const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
104
- const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
106
+ const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
107
+ const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
105
108
  const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
106
109
  const template = getHTMLFile(`pdf_${pdfMode}`);
107
110
  const baseTemplateData = {
@@ -1,18 +1,5 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type ImageGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
6
- export declare const imageGoogleAgent: AgentFunction<{
7
- model: string;
8
- canvasSize: {
9
- width: number;
10
- height: number;
11
- };
12
- }, {
13
- buffer: Buffer;
14
- }, {
15
- prompt: string;
16
- }, ImageGoogleConfig>;
2
+ import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
17
4
  declare const imageGoogleAgentInfo: AgentFunctionInfo;
18
5
  export default imageGoogleAgentInfo;
@@ -7,12 +7,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
7
7
  const payload = {
8
8
  instances: [
9
9
  {
10
- prompt: prompt,
10
+ prompt,
11
11
  },
12
12
  ],
13
13
  parameters: {
14
14
  sampleCount: 1,
15
- aspectRatio: aspectRatio,
15
+ aspectRatio,
16
16
  safetySetting: "block_only_high",
17
17
  },
18
18
  };
@@ -51,7 +51,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
51
51
  throw error;
52
52
  }
53
53
  }
54
- export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
54
+ export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
55
55
  const { prompt } = namedInputs;
56
56
  const aspectRatio = getAspectRatio(params.canvasSize);
57
57
  const model = params.model ?? "imagen-3.0-fast-generate-001";
@@ -1,20 +1,5 @@
1
1
  import { AgentFunction, AgentFunctionInfo } from "graphai";
2
- type OpenAIModeration = "low" | "auto";
3
- export declare const imageOpenaiAgent: AgentFunction<{
4
- apiKey: string;
5
- model: string;
6
- moderation: OpenAIModeration | null | undefined;
7
- canvasSize: {
8
- width: number;
9
- height: number;
10
- };
11
- }, {
12
- buffer: Buffer;
13
- }, {
14
- prompt: string;
15
- images: string[] | null | undefined;
16
- }, {
17
- apiKey?: string;
18
- }>;
2
+ import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
3
+ export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
19
4
  declare const imageOpenaiAgentInfo: AgentFunctionInfo;
20
5
  export default imageOpenaiAgentInfo;
@@ -4,12 +4,12 @@ import { GraphAILogger } from "graphai";
4
4
  import OpenAI, { toFile } from "openai";
5
5
  import { defaultOpenAIImageModel } from "../utils/const.js";
6
6
  // https://platform.openai.com/docs/guides/image-generation
7
- export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
8
- const { prompt, images } = namedInputs;
7
+ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
8
+ const { prompt, referenceImages } = namedInputs;
9
9
  const { moderation, canvasSize } = params;
10
- const { apiKey } = { ...config };
10
+ const { apiKey, baseURL } = { ...config };
11
11
  const model = params.model ?? defaultOpenAIImageModel;
12
- const openai = new OpenAI({ apiKey });
12
+ const openai = new OpenAI({ apiKey, baseURL });
13
13
  const size = (() => {
14
14
  if (model === "gpt-image-1") {
15
15
  if (canvasSize.width > canvasSize.height) {
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
46
46
  const response = await (async () => {
47
47
  try {
48
48
  const targetSize = imageOptions.size;
49
- if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
- const imagelist = await Promise.all((images ?? []).map(async (file) => {
49
+ if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
50
+ const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
51
51
  const ext = path.extname(file).toLowerCase();
52
52
  const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
53
53
  return await toFile(fs.createReadStream(file), null, { type });
54
54
  }));
55
- return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
55
+ return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
56
56
  }
57
57
  else {
58
58
  return await openai.images.generate(imageOptions);
@@ -1,24 +1,9 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- export type MovieGoogleConfig = {
3
- projectId?: string;
4
- token?: string;
5
- };
2
+ import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
6
3
  export declare const getAspectRatio: (canvasSize: {
7
4
  width: number;
8
5
  height: number;
9
6
  }) => string;
10
- export declare const movieGoogleAgent: AgentFunction<{
11
- model: string;
12
- canvasSize: {
13
- width: number;
14
- height: number;
15
- };
16
- duration?: number;
17
- }, {
18
- buffer: Buffer;
19
- }, {
20
- prompt: string;
21
- imagePath?: string;
22
- }, MovieGoogleConfig>;
7
+ export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
23
8
  declare const movieGoogleAgentInfo: AgentFunctionInfo;
24
9
  export default movieGoogleAgentInfo;
@@ -5,13 +5,13 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
5
5
  const payload = {
6
6
  instances: [
7
7
  {
8
- prompt: prompt,
8
+ prompt,
9
9
  image: undefined,
10
10
  },
11
11
  ],
12
12
  parameters: {
13
13
  sampleCount: 1,
14
- aspectRatio: aspectRatio,
14
+ aspectRatio,
15
15
  safetySetting: "block_only_high",
16
16
  personGeneration: "allow_all",
17
17
  durationSeconds: duration,
@@ -46,7 +46,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
46
46
  while (true) {
47
47
  GraphAILogger.info("...waiting for movie generation...");
48
48
  await sleep(3000);
49
- const response = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
49
+ const operationResponse = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
50
50
  method: "POST",
51
51
  headers: {
52
52
  Authorization: `Bearer ${token}`,
@@ -54,10 +54,10 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
54
54
  },
55
55
  body: JSON.stringify(fetchBody),
56
56
  });
57
- if (!response.ok) {
58
- throw new Error(`Error: ${response.status} - ${response.statusText}`);
57
+ if (!operationResponse.ok) {
58
+ throw new Error(`Error: ${operationResponse.status} - ${operationResponse.statusText}`);
59
59
  }
60
- const responseData = await response.json();
60
+ const responseData = await operationResponse.json();
61
61
  if (responseData.done) {
62
62
  if (responseData.error) {
63
63
  GraphAILogger.info("Prompt: ", prompt);
@@ -87,7 +87,7 @@ export const getAspectRatio = (canvasSize) => {
87
87
  return "1:1";
88
88
  }
89
89
  };
90
- export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
90
+ export const movieGoogleAgent = async ({ namedInputs, params, config, }) => {
91
91
  const { prompt, imagePath } = namedInputs;
92
92
  const aspectRatio = getAspectRatio(params.canvasSize);
93
93
  const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
@@ -1,23 +1,9 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentParams, ReplicateMovieAgentConfig } from "../types/agent.js";
2
3
  export declare const getAspectRatio: (canvasSize: {
3
4
  width: number;
4
5
  height: number;
5
6
  }) => string;
6
- export type MovieReplicateConfig = {
7
- apiKey?: string;
8
- };
9
- export declare const movieReplicateAgent: AgentFunction<{
10
- model: `${string}/${string}` | undefined;
11
- canvasSize: {
12
- width: number;
13
- height: number;
14
- };
15
- duration?: number;
16
- }, {
17
- buffer: Buffer;
18
- }, {
19
- prompt: string;
20
- imagePath?: string;
21
- }, MovieReplicateConfig>;
7
+ export declare const movieReplicateAgent: AgentFunction<ReplicateMovieAgentParams, AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentConfig>;
22
8
  declare const movieReplicateAgentInfo: AgentFunctionInfo;
23
9
  export default movieReplicateAgentInfo;