mulmocast 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/characters.json +16 -0
- package/assets/templates/html.json +6 -0
- package/lib/actions/audio.js +8 -6
- package/lib/actions/image_agents.d.ts +121 -0
- package/lib/actions/image_agents.js +56 -0
- package/lib/actions/image_references.d.ts +9 -0
- package/lib/actions/image_references.js +79 -0
- package/lib/actions/images.d.ts +9 -109
- package/lib/actions/images.js +68 -184
- package/lib/actions/index.d.ts +2 -0
- package/lib/actions/index.js +2 -0
- package/lib/actions/movie.js +3 -1
- package/lib/actions/pdf.js +5 -2
- package/lib/agents/image_google_agent.d.ts +2 -15
- package/lib/agents/image_google_agent.js +3 -3
- package/lib/agents/image_openai_agent.d.ts +2 -17
- package/lib/agents/image_openai_agent.js +7 -7
- package/lib/agents/movie_google_agent.d.ts +2 -17
- package/lib/agents/movie_google_agent.js +7 -7
- package/lib/agents/movie_replicate_agent.d.ts +2 -16
- package/lib/agents/movie_replicate_agent.js +3 -3
- package/lib/agents/tts_google_agent.d.ts +9 -1
- package/lib/agents/tts_google_agent.js +2 -2
- package/lib/agents/tts_nijivoice_agent.js +1 -1
- package/lib/agents/tts_openai_agent.d.ts +13 -1
- package/lib/agents/tts_openai_agent.js +2 -2
- package/lib/cli/helpers.js +7 -7
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_beat.d.ts +6 -0
- package/lib/methods/mulmo_beat.js +21 -0
- package/lib/methods/mulmo_presentation_style.d.ts +2 -0
- package/lib/methods/mulmo_presentation_style.js +24 -0
- package/lib/methods/mulmo_studio_context.js +3 -0
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/agent.d.ts +55 -0
- package/lib/types/agent.js +3 -0
- package/lib/types/schema.d.ts +317 -74
- package/lib/types/schema.js +9 -2
- package/lib/types/type.d.ts +3 -2
- package/lib/utils/context.d.ts +12 -2
- package/lib/utils/context.js +1 -0
- package/lib/utils/ffmpeg_utils.d.ts +1 -1
- package/lib/utils/ffmpeg_utils.js +1 -1
- package/lib/utils/file.js +4 -4
- package/lib/utils/filters.js +3 -4
- package/lib/utils/markdown.js +1 -1
- package/lib/utils/preprocess.d.ts +8 -2
- package/lib/utils/string.js +5 -5
- package/lib/utils/utils.d.ts +8 -1
- package/lib/utils/utils.js +51 -36
- package/package.json +7 -6
- package/scripts/templates/html.json +42 -0
- package/scripts/templates/image_refs.json +35 -0
package/lib/actions/images.js
CHANGED
|
@@ -1,86 +1,32 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
|
+
import { GoogleAuth } from "google-auth-library";
|
|
4
5
|
import * as agents from "@graphai/vanilla";
|
|
5
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
6
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
7
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
8
|
-
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
9
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
10
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
|
-
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
17
|
-
const vanillaAgents = agents.default ?? agents;
|
|
18
|
-
dotenv.config();
|
|
19
|
-
import { GoogleAuth } from "google-auth-library";
|
|
11
|
+
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
+
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
20
14
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
|
|
35
|
-
};
|
|
36
|
-
if (beat.image) {
|
|
37
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
38
|
-
if (!plugin) {
|
|
39
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
40
|
-
}
|
|
41
|
-
const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
|
42
|
-
// undefined prompt indicates that image generation is not needed
|
|
43
|
-
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
44
|
-
}
|
|
45
|
-
if (beat.htmlPrompt) {
|
|
46
|
-
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
47
|
-
const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
|
|
48
|
-
return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
49
|
-
}
|
|
50
|
-
// images for "edit_image"
|
|
51
|
-
const images = (() => {
|
|
52
|
-
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
53
|
-
const sources = imageNames.map((name) => imageRefs[name]);
|
|
54
|
-
return sources.filter((source) => source !== undefined);
|
|
55
|
-
})();
|
|
56
|
-
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
57
|
-
return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
58
|
-
}
|
|
59
|
-
const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
|
|
60
|
-
return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
61
|
-
};
|
|
62
|
-
export const imagePluginAgent = async (namedInputs) => {
|
|
63
|
-
const { context, beat, index } = namedInputs;
|
|
64
|
-
const imagePath = getBeatPngImagePath(context, index);
|
|
65
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
66
|
-
if (!plugin) {
|
|
67
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
68
|
-
}
|
|
69
|
-
try {
|
|
70
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
71
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
72
|
-
await plugin.process(processorParams);
|
|
73
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
74
|
-
}
|
|
75
|
-
catch (error) {
|
|
76
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
77
|
-
throw error;
|
|
78
|
-
}
|
|
79
|
-
};
|
|
80
|
-
const htmlImageGeneratorAgent = async (namedInputs) => {
|
|
81
|
-
const { file, canvasSize, htmlText } = namedInputs;
|
|
82
|
-
await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
|
|
15
|
+
import { getImageRefs } from "./image_references.js";
|
|
16
|
+
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
const vanillaAgents = agents.default ?? agents;
|
|
18
|
+
const imageAgents = {
|
|
19
|
+
...vanillaAgents,
|
|
20
|
+
imageGoogleAgent,
|
|
21
|
+
movieGoogleAgent,
|
|
22
|
+
movieReplicateAgent,
|
|
23
|
+
imageOpenaiAgent,
|
|
24
|
+
mediaMockAgent,
|
|
25
|
+
fileWriteAgent,
|
|
26
|
+
openAIAgent,
|
|
27
|
+
anthropicAgent,
|
|
83
28
|
};
|
|
29
|
+
dotenv.config();
|
|
84
30
|
const beat_graph_data = {
|
|
85
31
|
version: 0.5,
|
|
86
32
|
concurrency: 4,
|
|
@@ -91,6 +37,8 @@ const beat_graph_data = {
|
|
|
91
37
|
imageRefs: {},
|
|
92
38
|
beat: {},
|
|
93
39
|
__mapIndex: {},
|
|
40
|
+
forceMovie: { value: false },
|
|
41
|
+
forceImage: { value: false },
|
|
94
42
|
preprocessor: {
|
|
95
43
|
agent: imagePreprocessAgent,
|
|
96
44
|
inputs: {
|
|
@@ -108,7 +56,7 @@ const beat_graph_data = {
|
|
|
108
56
|
context: ":context",
|
|
109
57
|
beat: ":beat",
|
|
110
58
|
index: ":__mapIndex",
|
|
111
|
-
onComplete: ":preprocessor",
|
|
59
|
+
onComplete: [":preprocessor"],
|
|
112
60
|
},
|
|
113
61
|
},
|
|
114
62
|
htmlImageAgent: {
|
|
@@ -122,10 +70,13 @@ const beat_graph_data = {
|
|
|
122
70
|
model: ":htmlImageAgentInfo.model",
|
|
123
71
|
max_tokens: ":htmlImageAgentInfo.max_tokens",
|
|
124
72
|
},
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
73
|
+
cache: {
|
|
74
|
+
force: [":context.force", ":forceImage"],
|
|
75
|
+
file: ":preprocessor.htmlPath",
|
|
76
|
+
index: ":__mapIndex",
|
|
77
|
+
mulmoContext: ":context",
|
|
78
|
+
sessionType: "html",
|
|
79
|
+
},
|
|
129
80
|
},
|
|
130
81
|
},
|
|
131
82
|
htmlReader: {
|
|
@@ -135,7 +86,7 @@ const beat_graph_data = {
|
|
|
135
86
|
return { html };
|
|
136
87
|
},
|
|
137
88
|
inputs: {
|
|
138
|
-
onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
|
|
89
|
+
onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
|
|
139
90
|
htmlPath: ":preprocessor.htmlPath",
|
|
140
91
|
},
|
|
141
92
|
output: {
|
|
@@ -150,10 +101,7 @@ const beat_graph_data = {
|
|
|
150
101
|
inputs: {
|
|
151
102
|
htmlText: ":htmlReader.htmlText",
|
|
152
103
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
153
|
-
file: ":preprocessor.imagePath",
|
|
154
|
-
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
155
|
-
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
156
|
-
sessionType: "image", // for fileCacheAgentFilter
|
|
104
|
+
file: ":preprocessor.imagePath",
|
|
157
105
|
},
|
|
158
106
|
},
|
|
159
107
|
imageGenerator: {
|
|
@@ -162,12 +110,14 @@ const beat_graph_data = {
|
|
|
162
110
|
retry: 2,
|
|
163
111
|
inputs: {
|
|
164
112
|
prompt: ":preprocessor.prompt",
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
113
|
+
referenceImages: ":preprocessor.referenceImages",
|
|
114
|
+
cache: {
|
|
115
|
+
force: [":context.force", ":forceImage"],
|
|
116
|
+
file: ":preprocessor.imagePath",
|
|
117
|
+
index: ":__mapIndex",
|
|
118
|
+
mulmoContext: ":context",
|
|
119
|
+
sessionType: "image",
|
|
120
|
+
},
|
|
171
121
|
params: {
|
|
172
122
|
model: ":preprocessor.imageParams.model",
|
|
173
123
|
moderation: ":preprocessor.imageParams.moderation",
|
|
@@ -182,12 +132,14 @@ const beat_graph_data = {
|
|
|
182
132
|
inputs: {
|
|
183
133
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
184
134
|
prompt: ":beat.moviePrompt",
|
|
185
|
-
imagePath: ":preprocessor.
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
135
|
+
imagePath: ":preprocessor.referenceImageForMovie",
|
|
136
|
+
cache: {
|
|
137
|
+
force: [":context.force", ":forceMovie"],
|
|
138
|
+
file: ":preprocessor.movieFile",
|
|
139
|
+
index: ":__mapIndex",
|
|
140
|
+
sessionType: "movie",
|
|
141
|
+
mulmoContext: ":context",
|
|
142
|
+
},
|
|
191
143
|
params: {
|
|
192
144
|
model: ":context.presentationStyle.movieParams.model",
|
|
193
145
|
duration: ":beat.duration",
|
|
@@ -199,15 +151,14 @@ const beat_graph_data = {
|
|
|
199
151
|
imageFromMovie: {
|
|
200
152
|
if: ":preprocessor.imageFromMovie",
|
|
201
153
|
agent: async (namedInputs) => {
|
|
202
|
-
await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
203
|
-
return { generatedImage: true };
|
|
154
|
+
return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
204
155
|
},
|
|
205
156
|
inputs: {
|
|
206
|
-
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
157
|
+
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
207
158
|
imageFile: ":preprocessor.imagePath",
|
|
208
159
|
movieFile: ":preprocessor.movieFile",
|
|
209
160
|
},
|
|
210
|
-
defaultValue: {
|
|
161
|
+
defaultValue: {},
|
|
211
162
|
},
|
|
212
163
|
output: {
|
|
213
164
|
agent: "copyAgent",
|
|
@@ -307,21 +258,19 @@ const googleAuth = async () => {
|
|
|
307
258
|
throw error;
|
|
308
259
|
}
|
|
309
260
|
};
|
|
310
|
-
const graphOption = async (context, settings) => {
|
|
311
|
-
const agentFilters = [
|
|
312
|
-
{
|
|
313
|
-
name: "fileCacheAgentFilter",
|
|
314
|
-
agent: fileCacheAgentFilter,
|
|
315
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
|
|
316
|
-
},
|
|
317
|
-
];
|
|
318
|
-
const taskManager = new TaskManager(getConcurrency(context));
|
|
261
|
+
export const graphOption = async (context, settings) => {
|
|
319
262
|
const options = {
|
|
320
|
-
agentFilters
|
|
321
|
-
|
|
263
|
+
agentFilters: [
|
|
264
|
+
{
|
|
265
|
+
name: "fileCacheAgentFilter",
|
|
266
|
+
agent: fileCacheAgentFilter,
|
|
267
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
|
|
268
|
+
},
|
|
269
|
+
],
|
|
270
|
+
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
322
271
|
};
|
|
323
272
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
324
|
-
const config = settings2GraphAIConfig(settings);
|
|
273
|
+
const config = settings2GraphAIConfig(settings, process.env);
|
|
325
274
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
326
275
|
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
327
276
|
userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
|
|
@@ -339,32 +288,6 @@ const graphOption = async (context, settings) => {
|
|
|
339
288
|
options.config = config;
|
|
340
289
|
return options;
|
|
341
290
|
};
|
|
342
|
-
// TODO: unit test
|
|
343
|
-
export const getImageRefs = async (context) => {
|
|
344
|
-
const imageRefs = {};
|
|
345
|
-
const images = context.presentationStyle.imageParams?.images;
|
|
346
|
-
if (images) {
|
|
347
|
-
await Promise.all(Object.keys(images).map(async (key) => {
|
|
348
|
-
const image = images[key];
|
|
349
|
-
if (image.source.kind === "path") {
|
|
350
|
-
imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
|
|
351
|
-
}
|
|
352
|
-
else if (image.source.kind === "url") {
|
|
353
|
-
const response = await fetch(image.source.url);
|
|
354
|
-
if (!response.ok) {
|
|
355
|
-
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
356
|
-
}
|
|
357
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
358
|
-
// Detect file extension from Content-Type header or URL
|
|
359
|
-
const extension = getExtention(response.headers.get("content-type"), image.source.url);
|
|
360
|
-
const imagePath = getReferenceImagePath(context, key, extension);
|
|
361
|
-
await fs.promises.writeFile(imagePath, buffer);
|
|
362
|
-
imageRefs[key] = imagePath;
|
|
363
|
-
}
|
|
364
|
-
}));
|
|
365
|
-
}
|
|
366
|
-
return imageRefs;
|
|
367
|
-
};
|
|
368
291
|
const prepareGenerateImages = async (context) => {
|
|
369
292
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
370
293
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
@@ -373,56 +296,22 @@ const prepareGenerateImages = async (context) => {
|
|
|
373
296
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
374
297
|
const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
|
|
375
298
|
const imageRefs = await getImageRefs(context);
|
|
376
|
-
// Determine movie agent based on provider
|
|
377
|
-
const getMovieAgent = () => {
|
|
378
|
-
const provider = context.presentationStyle.movieParams?.provider ?? "google";
|
|
379
|
-
switch (provider) {
|
|
380
|
-
case "replicate":
|
|
381
|
-
return "movieReplicateAgent";
|
|
382
|
-
case "google":
|
|
383
|
-
default:
|
|
384
|
-
return "movieGoogleAgent";
|
|
385
|
-
}
|
|
386
|
-
};
|
|
387
299
|
GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
|
|
388
300
|
const injections = {
|
|
389
301
|
context,
|
|
390
302
|
htmlImageAgentInfo,
|
|
391
303
|
movieAgentInfo: {
|
|
392
|
-
agent: getMovieAgent(),
|
|
304
|
+
agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
|
|
393
305
|
},
|
|
394
306
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
395
307
|
imageRefs,
|
|
396
308
|
};
|
|
397
309
|
return injections;
|
|
398
310
|
};
|
|
399
|
-
const getConcurrency = (context) => {
|
|
400
|
-
if (context.presentationStyle.movieParams?.provider === "replicate") {
|
|
401
|
-
return 4;
|
|
402
|
-
}
|
|
403
|
-
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
404
|
-
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
405
|
-
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
406
|
-
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
407
|
-
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
408
|
-
return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
|
|
409
|
-
}
|
|
410
|
-
return 4;
|
|
411
|
-
};
|
|
412
311
|
const generateImages = async (context, settings, callbacks) => {
|
|
413
312
|
const options = await graphOption(context, settings);
|
|
414
313
|
const injections = await prepareGenerateImages(context);
|
|
415
|
-
const graph = new GraphAI(graph_data,
|
|
416
|
-
...vanillaAgents,
|
|
417
|
-
imageGoogleAgent,
|
|
418
|
-
movieGoogleAgent,
|
|
419
|
-
movieReplicateAgent,
|
|
420
|
-
imageOpenaiAgent,
|
|
421
|
-
mediaMockAgent,
|
|
422
|
-
fileWriteAgent,
|
|
423
|
-
openAIAgent,
|
|
424
|
-
anthropicAgent,
|
|
425
|
-
}, options);
|
|
314
|
+
const graph = new GraphAI(graph_data, imageAgents, options);
|
|
426
315
|
Object.keys(injections).forEach((key) => {
|
|
427
316
|
graph.injectValue(key, injections[key]);
|
|
428
317
|
});
|
|
@@ -434,6 +323,7 @@ const generateImages = async (context, settings, callbacks) => {
|
|
|
434
323
|
const res = await graph.run();
|
|
435
324
|
return res.mergeResult;
|
|
436
325
|
};
|
|
326
|
+
// public api
|
|
437
327
|
export const images = async (context, settings, callbacks) => {
|
|
438
328
|
try {
|
|
439
329
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
@@ -446,20 +336,12 @@ export const images = async (context, settings, callbacks) => {
|
|
|
446
336
|
throw error;
|
|
447
337
|
}
|
|
448
338
|
};
|
|
449
|
-
|
|
339
|
+
// public api
|
|
340
|
+
export const generateBeatImage = async (inputs) => {
|
|
341
|
+
const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
|
|
450
342
|
const options = await graphOption(context, settings);
|
|
451
343
|
const injections = await prepareGenerateImages(context);
|
|
452
|
-
const graph = new GraphAI(beat_graph_data,
|
|
453
|
-
...vanillaAgents,
|
|
454
|
-
imageGoogleAgent,
|
|
455
|
-
movieGoogleAgent,
|
|
456
|
-
movieReplicateAgent,
|
|
457
|
-
imageOpenaiAgent,
|
|
458
|
-
mediaMockAgent,
|
|
459
|
-
fileWriteAgent,
|
|
460
|
-
openAIAgent,
|
|
461
|
-
anthropicAgent,
|
|
462
|
-
}, options);
|
|
344
|
+
const graph = new GraphAI(beat_graph_data, imageAgents, options);
|
|
463
345
|
Object.keys(injections).forEach((key) => {
|
|
464
346
|
if ("outputStudioFilePath" !== key) {
|
|
465
347
|
graph.injectValue(key, injections[key]);
|
|
@@ -467,6 +349,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
|
|
|
467
349
|
});
|
|
468
350
|
graph.injectValue("__mapIndex", index);
|
|
469
351
|
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
352
|
+
graph.injectValue("forceMovie", forceMovie ?? false);
|
|
353
|
+
graph.injectValue("forceImage", forceImage ?? false);
|
|
470
354
|
if (callbacks) {
|
|
471
355
|
callbacks.forEach((callback) => {
|
|
472
356
|
graph.registerCallback(callback);
|
package/lib/actions/index.d.ts
CHANGED
package/lib/actions/index.js
CHANGED
package/lib/actions/movie.js
CHANGED
|
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
218
218
|
// Concatenate the trimmed images
|
|
219
219
|
const concatVideoId = "concat_video";
|
|
220
220
|
const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
|
|
221
|
-
|
|
221
|
+
const inputs = videoIds.map((id) => `[${id}]`).join("");
|
|
222
|
+
const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
|
|
223
|
+
ffmpegContext.filterComplex.push(filter);
|
|
222
224
|
const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
|
|
223
225
|
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
|
|
224
226
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
package/lib/actions/pdf.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import puppeteer from "puppeteer";
|
|
4
|
+
import { GraphAILogger } from "graphai";
|
|
4
5
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
5
6
|
import { localizedText, isHttp } from "../utils/utils.js";
|
|
6
7
|
import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
|
|
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
|
|
|
17
18
|
const mimeType = ext === "jpg" ? "jpeg" : ext;
|
|
18
19
|
return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
|
|
19
20
|
}
|
|
20
|
-
catch (
|
|
21
|
+
catch (error) {
|
|
22
|
+
GraphAILogger.info("loadImage failed", error);
|
|
21
23
|
const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
|
|
22
24
|
return `data:image/png;base64,${placeholderData.toString("base64")}`;
|
|
23
25
|
}
|
|
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
|
|
|
101
103
|
const imagePaths = studio.beats.map((beat) => beat.imageFile);
|
|
102
104
|
const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
|
|
103
105
|
const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
|
|
104
|
-
const
|
|
106
|
+
const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
|
|
107
|
+
const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
|
|
105
108
|
const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
|
|
106
109
|
const template = getHTMLFile(`pdf_${pdfMode}`);
|
|
107
110
|
const baseTemplateData = {
|
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
6
|
-
export declare const imageGoogleAgent: AgentFunction<{
|
|
7
|
-
model: string;
|
|
8
|
-
canvasSize: {
|
|
9
|
-
width: number;
|
|
10
|
-
height: number;
|
|
11
|
-
};
|
|
12
|
-
}, {
|
|
13
|
-
buffer: Buffer;
|
|
14
|
-
}, {
|
|
15
|
-
prompt: string;
|
|
16
|
-
}, ImageGoogleConfig>;
|
|
2
|
+
import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
|
|
17
4
|
declare const imageGoogleAgentInfo: AgentFunctionInfo;
|
|
18
5
|
export default imageGoogleAgentInfo;
|
|
@@ -7,12 +7,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
7
7
|
const payload = {
|
|
8
8
|
instances: [
|
|
9
9
|
{
|
|
10
|
-
prompt
|
|
10
|
+
prompt,
|
|
11
11
|
},
|
|
12
12
|
],
|
|
13
13
|
parameters: {
|
|
14
14
|
sampleCount: 1,
|
|
15
|
-
aspectRatio
|
|
15
|
+
aspectRatio,
|
|
16
16
|
safetySetting: "block_only_high",
|
|
17
17
|
},
|
|
18
18
|
};
|
|
@@ -51,7 +51,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
51
51
|
throw error;
|
|
52
52
|
}
|
|
53
53
|
}
|
|
54
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
54
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
55
55
|
const { prompt } = namedInputs;
|
|
56
56
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
57
57
|
const model = params.model ?? "imagen-3.0-fast-generate-001";
|
|
@@ -1,20 +1,5 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type
|
|
3
|
-
export declare const imageOpenaiAgent: AgentFunction<
|
|
4
|
-
apiKey: string;
|
|
5
|
-
model: string;
|
|
6
|
-
moderation: OpenAIModeration | null | undefined;
|
|
7
|
-
canvasSize: {
|
|
8
|
-
width: number;
|
|
9
|
-
height: number;
|
|
10
|
-
};
|
|
11
|
-
}, {
|
|
12
|
-
buffer: Buffer;
|
|
13
|
-
}, {
|
|
14
|
-
prompt: string;
|
|
15
|
-
images: string[] | null | undefined;
|
|
16
|
-
}, {
|
|
17
|
-
apiKey?: string;
|
|
18
|
-
}>;
|
|
2
|
+
import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
|
|
19
4
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
20
5
|
export default imageOpenaiAgentInfo;
|
|
@@ -4,12 +4,12 @@ import { GraphAILogger } from "graphai";
|
|
|
4
4
|
import OpenAI, { toFile } from "openai";
|
|
5
5
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
|
-
export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
8
|
-
const { prompt,
|
|
7
|
+
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
|
+
const { prompt, referenceImages } = namedInputs;
|
|
9
9
|
const { moderation, canvasSize } = params;
|
|
10
|
-
const { apiKey } = { ...config };
|
|
10
|
+
const { apiKey, baseURL } = { ...config };
|
|
11
11
|
const model = params.model ?? defaultOpenAIImageModel;
|
|
12
|
-
const openai = new OpenAI({ apiKey });
|
|
12
|
+
const openai = new OpenAI({ apiKey, baseURL });
|
|
13
13
|
const size = (() => {
|
|
14
14
|
if (model === "gpt-image-1") {
|
|
15
15
|
if (canvasSize.width > canvasSize.height) {
|
|
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
|
46
46
|
const response = await (async () => {
|
|
47
47
|
try {
|
|
48
48
|
const targetSize = imageOptions.size;
|
|
49
|
-
if ((
|
|
50
|
-
const
|
|
49
|
+
if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
|
|
50
|
+
const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
|
|
51
51
|
const ext = path.extname(file).toLowerCase();
|
|
52
52
|
const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
|
|
53
53
|
return await toFile(fs.createReadStream(file), null, { type });
|
|
54
54
|
}));
|
|
55
|
-
return await openai.images.edit({ ...imageOptions, size: targetSize, image:
|
|
55
|
+
return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
|
|
56
56
|
}
|
|
57
57
|
else {
|
|
58
58
|
return await openai.images.generate(imageOptions);
|
|
@@ -1,24 +1,9 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
projectId?: string;
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
2
|
+
import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
|
|
6
3
|
export declare const getAspectRatio: (canvasSize: {
|
|
7
4
|
width: number;
|
|
8
5
|
height: number;
|
|
9
6
|
}) => string;
|
|
10
|
-
export declare const movieGoogleAgent: AgentFunction<
|
|
11
|
-
model: string;
|
|
12
|
-
canvasSize: {
|
|
13
|
-
width: number;
|
|
14
|
-
height: number;
|
|
15
|
-
};
|
|
16
|
-
duration?: number;
|
|
17
|
-
}, {
|
|
18
|
-
buffer: Buffer;
|
|
19
|
-
}, {
|
|
20
|
-
prompt: string;
|
|
21
|
-
imagePath?: string;
|
|
22
|
-
}, MovieGoogleConfig>;
|
|
7
|
+
export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
|
|
23
8
|
declare const movieGoogleAgentInfo: AgentFunctionInfo;
|
|
24
9
|
export default movieGoogleAgentInfo;
|
|
@@ -5,13 +5,13 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
5
5
|
const payload = {
|
|
6
6
|
instances: [
|
|
7
7
|
{
|
|
8
|
-
prompt
|
|
8
|
+
prompt,
|
|
9
9
|
image: undefined,
|
|
10
10
|
},
|
|
11
11
|
],
|
|
12
12
|
parameters: {
|
|
13
13
|
sampleCount: 1,
|
|
14
|
-
aspectRatio
|
|
14
|
+
aspectRatio,
|
|
15
15
|
safetySetting: "block_only_high",
|
|
16
16
|
personGeneration: "allow_all",
|
|
17
17
|
durationSeconds: duration,
|
|
@@ -46,7 +46,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
46
46
|
while (true) {
|
|
47
47
|
GraphAILogger.info("...waiting for movie generation...");
|
|
48
48
|
await sleep(3000);
|
|
49
|
-
const
|
|
49
|
+
const operationResponse = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:fetchPredictOperation`, {
|
|
50
50
|
method: "POST",
|
|
51
51
|
headers: {
|
|
52
52
|
Authorization: `Bearer ${token}`,
|
|
@@ -54,10 +54,10 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
54
54
|
},
|
|
55
55
|
body: JSON.stringify(fetchBody),
|
|
56
56
|
});
|
|
57
|
-
if (!
|
|
58
|
-
throw new Error(`Error: ${
|
|
57
|
+
if (!operationResponse.ok) {
|
|
58
|
+
throw new Error(`Error: ${operationResponse.status} - ${operationResponse.statusText}`);
|
|
59
59
|
}
|
|
60
|
-
const responseData = await
|
|
60
|
+
const responseData = await operationResponse.json();
|
|
61
61
|
if (responseData.done) {
|
|
62
62
|
if (responseData.error) {
|
|
63
63
|
GraphAILogger.info("Prompt: ", prompt);
|
|
@@ -87,7 +87,7 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
87
87
|
return "1:1";
|
|
88
88
|
}
|
|
89
89
|
};
|
|
90
|
-
export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
90
|
+
export const movieGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
91
91
|
const { prompt, imagePath } = namedInputs;
|
|
92
92
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
93
93
|
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
@@ -1,23 +1,9 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentParams, ReplicateMovieAgentConfig } from "../types/agent.js";
|
|
2
3
|
export declare const getAspectRatio: (canvasSize: {
|
|
3
4
|
width: number;
|
|
4
5
|
height: number;
|
|
5
6
|
}) => string;
|
|
6
|
-
export
|
|
7
|
-
apiKey?: string;
|
|
8
|
-
};
|
|
9
|
-
export declare const movieReplicateAgent: AgentFunction<{
|
|
10
|
-
model: `${string}/${string}` | undefined;
|
|
11
|
-
canvasSize: {
|
|
12
|
-
width: number;
|
|
13
|
-
height: number;
|
|
14
|
-
};
|
|
15
|
-
duration?: number;
|
|
16
|
-
}, {
|
|
17
|
-
buffer: Buffer;
|
|
18
|
-
}, {
|
|
19
|
-
prompt: string;
|
|
20
|
-
imagePath?: string;
|
|
21
|
-
}, MovieReplicateConfig>;
|
|
7
|
+
export declare const movieReplicateAgent: AgentFunction<ReplicateMovieAgentParams, AgentBufferResult, MovieAgentInputs, ReplicateMovieAgentConfig>;
|
|
22
8
|
declare const movieReplicateAgentInfo: AgentFunctionInfo;
|
|
23
9
|
export default movieReplicateAgentInfo;
|