mulmocast 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/characters.json +16 -0
- package/assets/templates/html.json +6 -0
- package/lib/actions/audio.js +8 -6
- package/lib/actions/image_agents.d.ts +121 -0
- package/lib/actions/image_agents.js +56 -0
- package/lib/actions/image_references.d.ts +9 -0
- package/lib/actions/image_references.js +79 -0
- package/lib/actions/images.d.ts +9 -105
- package/lib/actions/images.js +83 -182
- package/lib/actions/index.d.ts +2 -0
- package/lib/actions/index.js +2 -0
- package/lib/actions/movie.js +3 -1
- package/lib/actions/pdf.js +5 -2
- package/lib/agents/image_google_agent.d.ts +2 -15
- package/lib/agents/image_google_agent.js +3 -3
- package/lib/agents/image_openai_agent.d.ts +2 -17
- package/lib/agents/image_openai_agent.js +7 -7
- package/lib/agents/movie_google_agent.d.ts +2 -17
- package/lib/agents/movie_google_agent.js +7 -7
- package/lib/agents/movie_replicate_agent.d.ts +2 -16
- package/lib/agents/movie_replicate_agent.js +3 -3
- package/lib/agents/tts_google_agent.d.ts +9 -1
- package/lib/agents/tts_google_agent.js +2 -2
- package/lib/agents/tts_nijivoice_agent.js +1 -1
- package/lib/agents/tts_openai_agent.d.ts +13 -1
- package/lib/agents/tts_openai_agent.js +2 -2
- package/lib/cli/helpers.js +7 -7
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_beat.d.ts +6 -0
- package/lib/methods/mulmo_beat.js +21 -0
- package/lib/methods/mulmo_presentation_style.d.ts +2 -0
- package/lib/methods/mulmo_presentation_style.js +24 -0
- package/lib/methods/mulmo_studio_context.js +3 -0
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/agent.d.ts +55 -0
- package/lib/types/agent.js +3 -0
- package/lib/types/schema.d.ts +322 -74
- package/lib/types/schema.js +10 -2
- package/lib/types/type.d.ts +3 -2
- package/lib/utils/context.d.ts +13 -2
- package/lib/utils/context.js +2 -0
- package/lib/utils/ffmpeg_utils.d.ts +1 -1
- package/lib/utils/ffmpeg_utils.js +1 -1
- package/lib/utils/file.js +4 -4
- package/lib/utils/filters.js +11 -7
- package/lib/utils/markdown.js +1 -1
- package/lib/utils/preprocess.d.ts +8 -2
- package/lib/utils/string.js +5 -5
- package/lib/utils/utils.d.ts +8 -1
- package/lib/utils/utils.js +51 -36
- package/package.json +10 -9
- package/scripts/templates/html.json +42 -0
- package/scripts/templates/image_refs.json +35 -0
package/lib/actions/images.js
CHANGED
|
@@ -1,88 +1,32 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
|
+
import { GoogleAuth } from "google-auth-library";
|
|
4
5
|
import * as agents from "@graphai/vanilla";
|
|
5
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
6
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
7
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
8
|
-
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
9
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
10
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
|
-
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
17
|
-
const vanillaAgents = agents.default ?? agents;
|
|
18
|
-
dotenv.config();
|
|
19
|
-
import { GoogleAuth } from "google-auth-library";
|
|
11
|
+
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
+
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
20
14
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
|
|
35
|
-
};
|
|
36
|
-
if (beat.image) {
|
|
37
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
38
|
-
if (!plugin) {
|
|
39
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
40
|
-
}
|
|
41
|
-
const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
|
42
|
-
// undefined prompt indicates that image generation is not needed
|
|
43
|
-
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
44
|
-
}
|
|
45
|
-
if (beat.htmlPrompt) {
|
|
46
|
-
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
47
|
-
return { imagePath, htmlPrompt, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
48
|
-
}
|
|
49
|
-
// images for "edit_image"
|
|
50
|
-
const images = (() => {
|
|
51
|
-
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
52
|
-
const sources = imageNames.map((name) => imageRefs[name]);
|
|
53
|
-
return sources.filter((source) => source !== undefined);
|
|
54
|
-
})();
|
|
55
|
-
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
56
|
-
return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
57
|
-
}
|
|
58
|
-
const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
|
|
59
|
-
return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
60
|
-
};
|
|
61
|
-
export const imagePluginAgent = async (namedInputs) => {
|
|
62
|
-
const { context, beat, index } = namedInputs;
|
|
63
|
-
const imagePath = getBeatPngImagePath(context, index);
|
|
64
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
65
|
-
if (!plugin) {
|
|
66
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
67
|
-
}
|
|
68
|
-
try {
|
|
69
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
70
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
71
|
-
await plugin.process(processorParams);
|
|
72
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
73
|
-
}
|
|
74
|
-
catch (error) {
|
|
75
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
76
|
-
throw error;
|
|
77
|
-
}
|
|
78
|
-
};
|
|
79
|
-
const htmlImageGeneratorAgent = async (namedInputs) => {
|
|
80
|
-
const { html, file, canvasSize } = namedInputs;
|
|
81
|
-
// Save HTML file
|
|
82
|
-
const htmlFile = file.replace(/\.[^/.]+$/, ".html");
|
|
83
|
-
await fs.promises.writeFile(htmlFile, html, "utf8");
|
|
84
|
-
await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
|
|
15
|
+
import { getImageRefs } from "./image_references.js";
|
|
16
|
+
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
const vanillaAgents = agents.default ?? agents;
|
|
18
|
+
const imageAgents = {
|
|
19
|
+
...vanillaAgents,
|
|
20
|
+
imageGoogleAgent,
|
|
21
|
+
movieGoogleAgent,
|
|
22
|
+
movieReplicateAgent,
|
|
23
|
+
imageOpenaiAgent,
|
|
24
|
+
mediaMockAgent,
|
|
25
|
+
fileWriteAgent,
|
|
26
|
+
openAIAgent,
|
|
27
|
+
anthropicAgent,
|
|
85
28
|
};
|
|
29
|
+
dotenv.config();
|
|
86
30
|
const beat_graph_data = {
|
|
87
31
|
version: 0.5,
|
|
88
32
|
concurrency: 4,
|
|
@@ -93,6 +37,8 @@ const beat_graph_data = {
|
|
|
93
37
|
imageRefs: {},
|
|
94
38
|
beat: {},
|
|
95
39
|
__mapIndex: {},
|
|
40
|
+
forceMovie: { value: false },
|
|
41
|
+
forceImage: { value: false },
|
|
96
42
|
preprocessor: {
|
|
97
43
|
agent: imagePreprocessAgent,
|
|
98
44
|
inputs: {
|
|
@@ -110,7 +56,7 @@ const beat_graph_data = {
|
|
|
110
56
|
context: ":context",
|
|
111
57
|
beat: ":beat",
|
|
112
58
|
index: ":__mapIndex",
|
|
113
|
-
onComplete: ":preprocessor",
|
|
59
|
+
onComplete: [":preprocessor"],
|
|
114
60
|
},
|
|
115
61
|
},
|
|
116
62
|
htmlImageAgent: {
|
|
@@ -124,19 +70,38 @@ const beat_graph_data = {
|
|
|
124
70
|
model: ":htmlImageAgentInfo.model",
|
|
125
71
|
max_tokens: ":htmlImageAgentInfo.max_tokens",
|
|
126
72
|
},
|
|
73
|
+
cache: {
|
|
74
|
+
force: [":context.force", ":forceImage"],
|
|
75
|
+
file: ":preprocessor.htmlPath",
|
|
76
|
+
index: ":__mapIndex",
|
|
77
|
+
mulmoContext: ":context",
|
|
78
|
+
sessionType: "html",
|
|
79
|
+
},
|
|
127
80
|
},
|
|
128
81
|
},
|
|
82
|
+
htmlReader: {
|
|
83
|
+
if: ":preprocessor.htmlPrompt",
|
|
84
|
+
agent: async (namedInputs) => {
|
|
85
|
+
const html = await fs.promises.readFile(namedInputs.htmlPath, "utf8");
|
|
86
|
+
return { html };
|
|
87
|
+
},
|
|
88
|
+
inputs: {
|
|
89
|
+
onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
|
|
90
|
+
htmlPath: ":preprocessor.htmlPath",
|
|
91
|
+
},
|
|
92
|
+
output: {
|
|
93
|
+
htmlText: ".html.codeBlockOrRaw()",
|
|
94
|
+
},
|
|
95
|
+
defaultValue: {},
|
|
96
|
+
},
|
|
129
97
|
htmlImageGenerator: {
|
|
130
98
|
if: ":preprocessor.htmlPrompt",
|
|
131
99
|
defaultValue: {},
|
|
132
100
|
agent: htmlImageGeneratorAgent,
|
|
133
101
|
inputs: {
|
|
134
|
-
|
|
102
|
+
htmlText: ":htmlReader.htmlText",
|
|
135
103
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
136
|
-
file: ":preprocessor.imagePath",
|
|
137
|
-
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
138
|
-
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
139
|
-
sessionType: "image", // for fileCacheAgentFilter
|
|
104
|
+
file: ":preprocessor.imagePath",
|
|
140
105
|
},
|
|
141
106
|
},
|
|
142
107
|
imageGenerator: {
|
|
@@ -145,12 +110,14 @@ const beat_graph_data = {
|
|
|
145
110
|
retry: 2,
|
|
146
111
|
inputs: {
|
|
147
112
|
prompt: ":preprocessor.prompt",
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
113
|
+
referenceImages: ":preprocessor.referenceImages",
|
|
114
|
+
cache: {
|
|
115
|
+
force: [":context.force", ":forceImage"],
|
|
116
|
+
file: ":preprocessor.imagePath",
|
|
117
|
+
index: ":__mapIndex",
|
|
118
|
+
mulmoContext: ":context",
|
|
119
|
+
sessionType: "image",
|
|
120
|
+
},
|
|
154
121
|
params: {
|
|
155
122
|
model: ":preprocessor.imageParams.model",
|
|
156
123
|
moderation: ":preprocessor.imageParams.moderation",
|
|
@@ -165,12 +132,14 @@ const beat_graph_data = {
|
|
|
165
132
|
inputs: {
|
|
166
133
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
167
134
|
prompt: ":beat.moviePrompt",
|
|
168
|
-
imagePath: ":preprocessor.
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
135
|
+
imagePath: ":preprocessor.referenceImageForMovie",
|
|
136
|
+
cache: {
|
|
137
|
+
force: [":context.force", ":forceMovie"],
|
|
138
|
+
file: ":preprocessor.movieFile",
|
|
139
|
+
index: ":__mapIndex",
|
|
140
|
+
sessionType: "movie",
|
|
141
|
+
mulmoContext: ":context",
|
|
142
|
+
},
|
|
174
143
|
params: {
|
|
175
144
|
model: ":context.presentationStyle.movieParams.model",
|
|
176
145
|
duration: ":beat.duration",
|
|
@@ -182,15 +151,14 @@ const beat_graph_data = {
|
|
|
182
151
|
imageFromMovie: {
|
|
183
152
|
if: ":preprocessor.imageFromMovie",
|
|
184
153
|
agent: async (namedInputs) => {
|
|
185
|
-
await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
186
|
-
return { generatedImage: true };
|
|
154
|
+
return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
187
155
|
},
|
|
188
156
|
inputs: {
|
|
189
|
-
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
157
|
+
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
190
158
|
imageFile: ":preprocessor.imagePath",
|
|
191
159
|
movieFile: ":preprocessor.movieFile",
|
|
192
160
|
},
|
|
193
|
-
defaultValue: {
|
|
161
|
+
defaultValue: {},
|
|
194
162
|
},
|
|
195
163
|
output: {
|
|
196
164
|
agent: "copyAgent",
|
|
@@ -290,21 +258,19 @@ const googleAuth = async () => {
|
|
|
290
258
|
throw error;
|
|
291
259
|
}
|
|
292
260
|
};
|
|
293
|
-
const graphOption = async (context, settings) => {
|
|
294
|
-
const agentFilters = [
|
|
295
|
-
{
|
|
296
|
-
name: "fileCacheAgentFilter",
|
|
297
|
-
agent: fileCacheAgentFilter,
|
|
298
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator"],
|
|
299
|
-
},
|
|
300
|
-
];
|
|
301
|
-
const taskManager = new TaskManager(getConcurrency(context));
|
|
261
|
+
export const graphOption = async (context, settings) => {
|
|
302
262
|
const options = {
|
|
303
|
-
agentFilters
|
|
304
|
-
|
|
263
|
+
agentFilters: [
|
|
264
|
+
{
|
|
265
|
+
name: "fileCacheAgentFilter",
|
|
266
|
+
agent: fileCacheAgentFilter,
|
|
267
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
|
|
268
|
+
},
|
|
269
|
+
],
|
|
270
|
+
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
305
271
|
};
|
|
306
272
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
307
|
-
const config = settings2GraphAIConfig(settings);
|
|
273
|
+
const config = settings2GraphAIConfig(settings, process.env);
|
|
308
274
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
309
275
|
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
310
276
|
userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
|
|
@@ -322,32 +288,6 @@ const graphOption = async (context, settings) => {
|
|
|
322
288
|
options.config = config;
|
|
323
289
|
return options;
|
|
324
290
|
};
|
|
325
|
-
// TODO: unit test
|
|
326
|
-
export const getImageRefs = async (context) => {
|
|
327
|
-
const imageRefs = {};
|
|
328
|
-
const images = context.presentationStyle.imageParams?.images;
|
|
329
|
-
if (images) {
|
|
330
|
-
await Promise.all(Object.keys(images).map(async (key) => {
|
|
331
|
-
const image = images[key];
|
|
332
|
-
if (image.source.kind === "path") {
|
|
333
|
-
imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
|
|
334
|
-
}
|
|
335
|
-
else if (image.source.kind === "url") {
|
|
336
|
-
const response = await fetch(image.source.url);
|
|
337
|
-
if (!response.ok) {
|
|
338
|
-
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
339
|
-
}
|
|
340
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
341
|
-
// Detect file extension from Content-Type header or URL
|
|
342
|
-
const extension = getExtention(response.headers.get("content-type"), image.source.url);
|
|
343
|
-
const imagePath = getReferenceImagePath(context, key, extension);
|
|
344
|
-
await fs.promises.writeFile(imagePath, buffer);
|
|
345
|
-
imageRefs[key] = imagePath;
|
|
346
|
-
}
|
|
347
|
-
}));
|
|
348
|
-
}
|
|
349
|
-
return imageRefs;
|
|
350
|
-
};
|
|
351
291
|
const prepareGenerateImages = async (context) => {
|
|
352
292
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
353
293
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
@@ -356,56 +296,22 @@ const prepareGenerateImages = async (context) => {
|
|
|
356
296
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
357
297
|
const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
|
|
358
298
|
const imageRefs = await getImageRefs(context);
|
|
359
|
-
// Determine movie agent based on provider
|
|
360
|
-
const getMovieAgent = () => {
|
|
361
|
-
const provider = context.presentationStyle.movieParams?.provider ?? "google";
|
|
362
|
-
switch (provider) {
|
|
363
|
-
case "replicate":
|
|
364
|
-
return "movieReplicateAgent";
|
|
365
|
-
case "google":
|
|
366
|
-
default:
|
|
367
|
-
return "movieGoogleAgent";
|
|
368
|
-
}
|
|
369
|
-
};
|
|
370
299
|
GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
|
|
371
300
|
const injections = {
|
|
372
301
|
context,
|
|
373
302
|
htmlImageAgentInfo,
|
|
374
303
|
movieAgentInfo: {
|
|
375
|
-
agent: getMovieAgent(),
|
|
304
|
+
agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
|
|
376
305
|
},
|
|
377
306
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
378
307
|
imageRefs,
|
|
379
308
|
};
|
|
380
309
|
return injections;
|
|
381
310
|
};
|
|
382
|
-
const getConcurrency = (context) => {
|
|
383
|
-
if (context.presentationStyle.movieParams?.provider === "replicate") {
|
|
384
|
-
return 4;
|
|
385
|
-
}
|
|
386
|
-
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
387
|
-
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
388
|
-
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
389
|
-
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
390
|
-
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
391
|
-
return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
|
|
392
|
-
}
|
|
393
|
-
return 4;
|
|
394
|
-
};
|
|
395
311
|
const generateImages = async (context, settings, callbacks) => {
|
|
396
312
|
const options = await graphOption(context, settings);
|
|
397
313
|
const injections = await prepareGenerateImages(context);
|
|
398
|
-
const graph = new GraphAI(graph_data,
|
|
399
|
-
...vanillaAgents,
|
|
400
|
-
imageGoogleAgent,
|
|
401
|
-
movieGoogleAgent,
|
|
402
|
-
movieReplicateAgent,
|
|
403
|
-
imageOpenaiAgent,
|
|
404
|
-
mediaMockAgent,
|
|
405
|
-
fileWriteAgent,
|
|
406
|
-
openAIAgent,
|
|
407
|
-
anthropicAgent,
|
|
408
|
-
}, options);
|
|
314
|
+
const graph = new GraphAI(graph_data, imageAgents, options);
|
|
409
315
|
Object.keys(injections).forEach((key) => {
|
|
410
316
|
graph.injectValue(key, injections[key]);
|
|
411
317
|
});
|
|
@@ -417,6 +323,7 @@ const generateImages = async (context, settings, callbacks) => {
|
|
|
417
323
|
const res = await graph.run();
|
|
418
324
|
return res.mergeResult;
|
|
419
325
|
};
|
|
326
|
+
// public api
|
|
420
327
|
export const images = async (context, settings, callbacks) => {
|
|
421
328
|
try {
|
|
422
329
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
@@ -429,20 +336,12 @@ export const images = async (context, settings, callbacks) => {
|
|
|
429
336
|
throw error;
|
|
430
337
|
}
|
|
431
338
|
};
|
|
432
|
-
|
|
339
|
+
// public api
|
|
340
|
+
export const generateBeatImage = async (inputs) => {
|
|
341
|
+
const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
|
|
433
342
|
const options = await graphOption(context, settings);
|
|
434
343
|
const injections = await prepareGenerateImages(context);
|
|
435
|
-
const graph = new GraphAI(beat_graph_data,
|
|
436
|
-
...vanillaAgents,
|
|
437
|
-
imageGoogleAgent,
|
|
438
|
-
movieGoogleAgent,
|
|
439
|
-
movieReplicateAgent,
|
|
440
|
-
imageOpenaiAgent,
|
|
441
|
-
mediaMockAgent,
|
|
442
|
-
fileWriteAgent,
|
|
443
|
-
openAIAgent,
|
|
444
|
-
anthropicAgent,
|
|
445
|
-
}, options);
|
|
344
|
+
const graph = new GraphAI(beat_graph_data, imageAgents, options);
|
|
446
345
|
Object.keys(injections).forEach((key) => {
|
|
447
346
|
if ("outputStudioFilePath" !== key) {
|
|
448
347
|
graph.injectValue(key, injections[key]);
|
|
@@ -450,6 +349,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
|
|
|
450
349
|
});
|
|
451
350
|
graph.injectValue("__mapIndex", index);
|
|
452
351
|
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
352
|
+
graph.injectValue("forceMovie", forceMovie ?? false);
|
|
353
|
+
graph.injectValue("forceImage", forceImage ?? false);
|
|
453
354
|
if (callbacks) {
|
|
454
355
|
callbacks.forEach((callback) => {
|
|
455
356
|
graph.registerCallback(callback);
|
package/lib/actions/index.d.ts
CHANGED
package/lib/actions/index.js
CHANGED
package/lib/actions/movie.js
CHANGED
|
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
218
218
|
// Concatenate the trimmed images
|
|
219
219
|
const concatVideoId = "concat_video";
|
|
220
220
|
const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
|
|
221
|
-
|
|
221
|
+
const inputs = videoIds.map((id) => `[${id}]`).join("");
|
|
222
|
+
const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
|
|
223
|
+
ffmpegContext.filterComplex.push(filter);
|
|
222
224
|
const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
|
|
223
225
|
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
|
|
224
226
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
package/lib/actions/pdf.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import puppeteer from "puppeteer";
|
|
4
|
+
import { GraphAILogger } from "graphai";
|
|
4
5
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
5
6
|
import { localizedText, isHttp } from "../utils/utils.js";
|
|
6
7
|
import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
|
|
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
|
|
|
17
18
|
const mimeType = ext === "jpg" ? "jpeg" : ext;
|
|
18
19
|
return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
|
|
19
20
|
}
|
|
20
|
-
catch (
|
|
21
|
+
catch (error) {
|
|
22
|
+
GraphAILogger.info("loadImage failed", error);
|
|
21
23
|
const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
|
|
22
24
|
return `data:image/png;base64,${placeholderData.toString("base64")}`;
|
|
23
25
|
}
|
|
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
|
|
|
101
103
|
const imagePaths = studio.beats.map((beat) => beat.imageFile);
|
|
102
104
|
const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
|
|
103
105
|
const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
|
|
104
|
-
const
|
|
106
|
+
const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
|
|
107
|
+
const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
|
|
105
108
|
const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
|
|
106
109
|
const template = getHTMLFile(`pdf_${pdfMode}`);
|
|
107
110
|
const baseTemplateData = {
|
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
6
|
-
export declare const imageGoogleAgent: AgentFunction<{
|
|
7
|
-
model: string;
|
|
8
|
-
canvasSize: {
|
|
9
|
-
width: number;
|
|
10
|
-
height: number;
|
|
11
|
-
};
|
|
12
|
-
}, {
|
|
13
|
-
buffer: Buffer;
|
|
14
|
-
}, {
|
|
15
|
-
prompt: string;
|
|
16
|
-
}, ImageGoogleConfig>;
|
|
2
|
+
import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
|
|
17
4
|
declare const imageGoogleAgentInfo: AgentFunctionInfo;
|
|
18
5
|
export default imageGoogleAgentInfo;
|
|
@@ -7,12 +7,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
7
7
|
const payload = {
|
|
8
8
|
instances: [
|
|
9
9
|
{
|
|
10
|
-
prompt
|
|
10
|
+
prompt,
|
|
11
11
|
},
|
|
12
12
|
],
|
|
13
13
|
parameters: {
|
|
14
14
|
sampleCount: 1,
|
|
15
|
-
aspectRatio
|
|
15
|
+
aspectRatio,
|
|
16
16
|
safetySetting: "block_only_high",
|
|
17
17
|
},
|
|
18
18
|
};
|
|
@@ -51,7 +51,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
51
51
|
throw error;
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
54
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
55
55
|
const { prompt } = namedInputs;
|
|
56
56
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
57
57
|
const model = params.model ?? "imagen-3.0-fast-generate-001";
|
|
@@ -1,20 +1,5 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type
|
|
3
|
-
export declare const imageOpenaiAgent: AgentFunction<
|
|
4
|
-
apiKey: string;
|
|
5
|
-
model: string;
|
|
6
|
-
moderation: OpenAIModeration | null | undefined;
|
|
7
|
-
canvasSize: {
|
|
8
|
-
width: number;
|
|
9
|
-
height: number;
|
|
10
|
-
};
|
|
11
|
-
}, {
|
|
12
|
-
buffer: Buffer;
|
|
13
|
-
}, {
|
|
14
|
-
prompt: string;
|
|
15
|
-
images: string[] | null | undefined;
|
|
16
|
-
}, {
|
|
17
|
-
apiKey?: string;
|
|
18
|
-
}>;
|
|
2
|
+
import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
|
|
19
4
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
20
5
|
export default imageOpenaiAgentInfo;
|
|
@@ -4,12 +4,12 @@ import { GraphAILogger } from "graphai";
|
|
|
4
4
|
import OpenAI, { toFile } from "openai";
|
|
5
5
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
|
-
export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
8
|
-
const { prompt,
|
|
7
|
+
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
|
+
const { prompt, referenceImages } = namedInputs;
|
|
9
9
|
const { moderation, canvasSize } = params;
|
|
10
|
-
const { apiKey } = { ...config };
|
|
10
|
+
const { apiKey, baseURL } = { ...config };
|
|
11
11
|
const model = params.model ?? defaultOpenAIImageModel;
|
|
12
|
-
const openai = new OpenAI({ apiKey });
|
|
12
|
+
const openai = new OpenAI({ apiKey, baseURL });
|
|
13
13
|
const size = (() => {
|
|
14
14
|
if (model === "gpt-image-1") {
|
|
15
15
|
if (canvasSize.width > canvasSize.height) {
|
|
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
|
46
46
|
const response = await (async () => {
|
|
47
47
|
try {
|
|
48
48
|
const targetSize = imageOptions.size;
|
|
49
|
-
if ((
|
|
50
|
-
const
|
|
49
|
+
if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
|
|
50
|
+
const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
|
|
51
51
|
const ext = path.extname(file).toLowerCase();
|
|
52
52
|
const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
|
|
53
53
|
return await toFile(fs.createReadStream(file), null, { type });
|
|
54
54
|
}));
|
|
55
|
-
return await openai.images.edit({ ...imageOptions, size: targetSize, image:
|
|
55
|
+
return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
|
|
56
56
|
}
|
|
57
57
|
else {
|
|
58
58
|
return await openai.images.generate(imageOptions);
|
|
@@ -1,24 +1,9 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
projectId?: string;
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
2
|
+
import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
|
|
6
3
|
export declare const getAspectRatio: (canvasSize: {
|
|
7
4
|
width: number;
|
|
8
5
|
height: number;
|
|
9
6
|
}) => string;
|
|
10
|
-
export declare const movieGoogleAgent: AgentFunction<
|
|
11
|
-
model: string;
|
|
12
|
-
canvasSize: {
|
|
13
|
-
width: number;
|
|
14
|
-
height: number;
|
|
15
|
-
};
|
|
16
|
-
duration?: number;
|
|
17
|
-
}, {
|
|
18
|
-
buffer: Buffer;
|
|
19
|
-
}, {
|
|
20
|
-
prompt: string;
|
|
21
|
-
imagePath?: string;
|
|
22
|
-
}, MovieGoogleConfig>;
|
|
7
|
+
export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
|
|
23
8
|
declare const movieGoogleAgentInfo: AgentFunctionInfo;
|
|
24
9
|
export default movieGoogleAgentInfo;
|
|
@@ -5,13 +5,13 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
5
5
|
const payload = {
|
|
6
6
|
instances: [
|
|
7
7
|
{
|
|
8
|
-
prompt
|
|
8
|
+
prompt,
|
|
9
9
|
image: undefined,
|
|
10
10
|
},
|
|
11
11
|
],
|
|
12
12
|
parameters: {
|
|
13
13
|
sampleCount: 1,
|
|
14
|
-
aspectRatio
|
|
14
|
+
aspectRatio,
|
|
15
15
|
safetySetting: "block_only_high",
|
|
16
16
|
personGeneration: "allow_all",
|
|
17
17
|
durationSeconds: duration,
|
|
@@ -46,7 +46,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
46
46
|
while (true) {
|
|
47
47
|
GraphAILogger.info("...waiting for movie generation...");
|
|
48
48
|
await sleep(3000);
|
|
49
|
-
const
|
|
49
|
+
const operationResponse = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
|
|
50
50
|
method: "POST",
|
|
51
51
|
headers: {
|
|
52
52
|
Authorization: `Bearer ${token}`,
|
|
@@ -54,10 +54,10 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
54
54
|
},
|
|
55
55
|
body: JSON.stringify(fetchBody),
|
|
56
56
|
});
|
|
57
|
-
if (!
|
|
58
|
-
throw new Error(`Error: ${
|
|
57
|
+
if (!operationResponse.ok) {
|
|
58
|
+
throw new Error(`Error: ${operationResponse.status} - ${operationResponse.statusText}`);
|
|
59
59
|
}
|
|
60
|
-
const responseData = await
|
|
60
|
+
const responseData = await operationResponse.json();
|
|
61
61
|
if (responseData.done) {
|
|
62
62
|
if (responseData.error) {
|
|
63
63
|
GraphAILogger.info("Prompt: ", prompt);
|
|
@@ -87,7 +87,7 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
87
87
|
return "1:1";
|
|
88
88
|
}
|
|
89
89
|
};
|
|
90
|
-
export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
90
|
+
export const movieGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
91
91
|
const { prompt, imagePath } = namedInputs;
|
|
92
92
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
93
93
|
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
@@ -1,23 +1,9 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentParams, ReplicateMovieAgentConfig } from "../types/agent.js";
|
|
2
3
|
export declare const getAspectRatio: (canvasSize: {
|
|
3
4
|
width: number;
|
|
4
5
|
height: number;
|
|
5
6
|
}) => string;
|
|
6
|
-
export
|
|
7
|
-
apiKey?: string;
|
|
8
|
-
};
|
|
9
|
-
export declare const movieReplicateAgent: AgentFunction<{
|
|
10
|
-
model: `${string}/${string}` | undefined;
|
|
11
|
-
canvasSize: {
|
|
12
|
-
width: number;
|
|
13
|
-
height: number;
|
|
14
|
-
};
|
|
15
|
-
duration?: number;
|
|
16
|
-
}, {
|
|
17
|
-
buffer: Buffer;
|
|
18
|
-
}, {
|
|
19
|
-
prompt: string;
|
|
20
|
-
imagePath?: string;
|
|
21
|
-
}, MovieReplicateConfig>;
|
|
7
|
+
export declare const movieReplicateAgent: AgentFunction<ReplicateMovieAgentParams, AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentConfig>;
|
|
22
8
|
declare const movieReplicateAgentInfo: AgentFunctionInfo;
|
|
23
9
|
export default movieReplicateAgentInfo;
|