mulmocast 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/characters.json +16 -0
- package/assets/templates/html.json +6 -0
- package/lib/actions/audio.js +13 -19
- package/lib/actions/image_agents.d.ts +145 -0
- package/lib/actions/image_agents.js +59 -0
- package/lib/actions/image_references.d.ts +9 -0
- package/lib/actions/image_references.js +79 -0
- package/lib/actions/images.d.ts +17 -109
- package/lib/actions/images.js +83 -188
- package/lib/actions/index.d.ts +2 -0
- package/lib/actions/index.js +2 -0
- package/lib/actions/movie.js +3 -1
- package/lib/actions/pdf.js +5 -2
- package/lib/agents/image_google_agent.d.ts +2 -15
- package/lib/agents/image_google_agent.js +5 -5
- package/lib/agents/image_openai_agent.d.ts +2 -17
- package/lib/agents/image_openai_agent.js +9 -9
- package/lib/agents/movie_google_agent.d.ts +2 -17
- package/lib/agents/movie_google_agent.js +7 -7
- package/lib/agents/movie_replicate_agent.d.ts +2 -16
- package/lib/agents/movie_replicate_agent.js +4 -4
- package/lib/agents/tts_google_agent.d.ts +9 -1
- package/lib/agents/tts_google_agent.js +2 -2
- package/lib/agents/tts_nijivoice_agent.js +1 -1
- package/lib/agents/tts_openai_agent.d.ts +13 -1
- package/lib/agents/tts_openai_agent.js +2 -2
- package/lib/cli/helpers.js +7 -7
- package/lib/index.d.ts +1 -0
- package/lib/index.js +1 -0
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_beat.d.ts +6 -0
- package/lib/methods/mulmo_beat.js +21 -0
- package/lib/methods/mulmo_presentation_style.d.ts +3 -1
- package/lib/methods/mulmo_presentation_style.js +31 -7
- package/lib/methods/mulmo_studio_context.js +3 -0
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/agent.d.ts +55 -0
- package/lib/types/agent.js +3 -0
- package/lib/types/schema.d.ts +560 -296
- package/lib/types/schema.js +19 -10
- package/lib/types/type.d.ts +3 -2
- package/lib/utils/const.d.ts +0 -1
- package/lib/utils/const.js +0 -1
- package/lib/utils/context.d.ts +24 -13
- package/lib/utils/context.js +1 -0
- package/lib/utils/ffmpeg_utils.d.ts +1 -1
- package/lib/utils/ffmpeg_utils.js +1 -1
- package/lib/utils/file.js +4 -4
- package/lib/utils/filters.js +3 -4
- package/lib/utils/markdown.js +1 -1
- package/lib/utils/preprocess.d.ts +15 -8
- package/lib/utils/provider2agent.d.ts +72 -0
- package/lib/utils/provider2agent.js +81 -0
- package/lib/utils/string.js +5 -5
- package/lib/utils/utils.d.ts +13 -11
- package/lib/utils/utils.js +56 -62
- package/package.json +7 -6
- package/scripts/templates/html.json +42 -0
- package/scripts/templates/image_refs.json +35 -0
package/lib/actions/images.js
CHANGED
|
@@ -1,86 +1,38 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
|
-
import
|
|
4
|
+
import { GoogleAuth } from "google-auth-library";
|
|
5
|
+
import * as vanilla from "@graphai/vanilla";
|
|
5
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
6
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
7
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
8
|
-
import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
|
|
9
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
10
9
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
11
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
|
-
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
17
|
-
const vanillaAgents = agents.default ?? agents;
|
|
18
|
-
dotenv.config();
|
|
19
|
-
import { GoogleAuth } from "google-auth-library";
|
|
11
|
+
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
|
+
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
+
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
20
14
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
15
|
+
import { getImageRefs } from "./image_references.js";
|
|
16
|
+
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
+
const vanillaAgents = vanilla.default ?? vanilla;
|
|
18
|
+
const imageAgents = {
|
|
19
|
+
imageGoogleAgent,
|
|
20
|
+
imageOpenaiAgent,
|
|
26
21
|
};
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
// const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
31
|
-
const imagePath = getBeatPngImagePath(context, index);
|
|
32
|
-
const returnValue = {
|
|
33
|
-
imageParams: imageAgentInfo.imageParams,
|
|
34
|
-
movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
|
|
35
|
-
};
|
|
36
|
-
if (beat.image) {
|
|
37
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
38
|
-
if (!plugin) {
|
|
39
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
40
|
-
}
|
|
41
|
-
const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
|
42
|
-
// undefined prompt indicates that image generation is not needed
|
|
43
|
-
return { imagePath: path, referenceImage: path, ...returnValue };
|
|
44
|
-
}
|
|
45
|
-
if (beat.htmlPrompt) {
|
|
46
|
-
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
47
|
-
const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
|
|
48
|
-
return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
49
|
-
}
|
|
50
|
-
// images for "edit_image"
|
|
51
|
-
const images = (() => {
|
|
52
|
-
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
53
|
-
const sources = imageNames.map((name) => imageRefs[name]);
|
|
54
|
-
return sources.filter((source) => source !== undefined);
|
|
55
|
-
})();
|
|
56
|
-
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
57
|
-
return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
58
|
-
}
|
|
59
|
-
const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
|
|
60
|
-
return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
|
|
61
|
-
};
|
|
62
|
-
export const imagePluginAgent = async (namedInputs) => {
|
|
63
|
-
const { context, beat, index } = namedInputs;
|
|
64
|
-
const imagePath = getBeatPngImagePath(context, index);
|
|
65
|
-
const plugin = findImagePlugin(beat?.image?.type);
|
|
66
|
-
if (!plugin) {
|
|
67
|
-
throw new Error(`invalid beat image type: ${beat.image}`);
|
|
68
|
-
}
|
|
69
|
-
try {
|
|
70
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
71
|
-
const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
|
|
72
|
-
await plugin.process(processorParams);
|
|
73
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
74
|
-
}
|
|
75
|
-
catch (error) {
|
|
76
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
77
|
-
throw error;
|
|
78
|
-
}
|
|
22
|
+
const movieAgents = {
|
|
23
|
+
movieGoogleAgent,
|
|
24
|
+
movieReplicateAgent,
|
|
79
25
|
};
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
26
|
+
const defaultAgents = {
|
|
27
|
+
...vanillaAgents,
|
|
28
|
+
...imageAgents,
|
|
29
|
+
...movieAgents,
|
|
30
|
+
mediaMockAgent,
|
|
31
|
+
fileWriteAgent,
|
|
32
|
+
openAIAgent,
|
|
33
|
+
anthropicAgent,
|
|
83
34
|
};
|
|
35
|
+
dotenv.config();
|
|
84
36
|
const beat_graph_data = {
|
|
85
37
|
version: 0.5,
|
|
86
38
|
concurrency: 4,
|
|
@@ -91,6 +43,8 @@ const beat_graph_data = {
|
|
|
91
43
|
imageRefs: {},
|
|
92
44
|
beat: {},
|
|
93
45
|
__mapIndex: {},
|
|
46
|
+
forceMovie: { value: false },
|
|
47
|
+
forceImage: { value: false },
|
|
94
48
|
preprocessor: {
|
|
95
49
|
agent: imagePreprocessAgent,
|
|
96
50
|
inputs: {
|
|
@@ -108,7 +62,7 @@ const beat_graph_data = {
|
|
|
108
62
|
context: ":context",
|
|
109
63
|
beat: ":beat",
|
|
110
64
|
index: ":__mapIndex",
|
|
111
|
-
onComplete: ":preprocessor",
|
|
65
|
+
onComplete: [":preprocessor"],
|
|
112
66
|
},
|
|
113
67
|
},
|
|
114
68
|
htmlImageAgent: {
|
|
@@ -122,10 +76,13 @@ const beat_graph_data = {
|
|
|
122
76
|
model: ":htmlImageAgentInfo.model",
|
|
123
77
|
max_tokens: ":htmlImageAgentInfo.max_tokens",
|
|
124
78
|
},
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
79
|
+
cache: {
|
|
80
|
+
force: [":context.force", ":forceImage"],
|
|
81
|
+
file: ":preprocessor.htmlPath",
|
|
82
|
+
index: ":__mapIndex",
|
|
83
|
+
mulmoContext: ":context",
|
|
84
|
+
sessionType: "html",
|
|
85
|
+
},
|
|
129
86
|
},
|
|
130
87
|
},
|
|
131
88
|
htmlReader: {
|
|
@@ -135,7 +92,7 @@ const beat_graph_data = {
|
|
|
135
92
|
return { html };
|
|
136
93
|
},
|
|
137
94
|
inputs: {
|
|
138
|
-
onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
|
|
95
|
+
onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
|
|
139
96
|
htmlPath: ":preprocessor.htmlPath",
|
|
140
97
|
},
|
|
141
98
|
output: {
|
|
@@ -150,10 +107,7 @@ const beat_graph_data = {
|
|
|
150
107
|
inputs: {
|
|
151
108
|
htmlText: ":htmlReader.htmlText",
|
|
152
109
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
153
|
-
file: ":preprocessor.imagePath",
|
|
154
|
-
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
155
|
-
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
156
|
-
sessionType: "image", // for fileCacheAgentFilter
|
|
110
|
+
file: ":preprocessor.imagePath",
|
|
157
111
|
},
|
|
158
112
|
},
|
|
159
113
|
imageGenerator: {
|
|
@@ -162,12 +116,14 @@ const beat_graph_data = {
|
|
|
162
116
|
retry: 2,
|
|
163
117
|
inputs: {
|
|
164
118
|
prompt: ":preprocessor.prompt",
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
119
|
+
referenceImages: ":preprocessor.referenceImages",
|
|
120
|
+
cache: {
|
|
121
|
+
force: [":context.force", ":forceImage"],
|
|
122
|
+
file: ":preprocessor.imagePath",
|
|
123
|
+
index: ":__mapIndex",
|
|
124
|
+
mulmoContext: ":context",
|
|
125
|
+
sessionType: "image",
|
|
126
|
+
},
|
|
171
127
|
params: {
|
|
172
128
|
model: ":preprocessor.imageParams.model",
|
|
173
129
|
moderation: ":preprocessor.imageParams.moderation",
|
|
@@ -182,14 +138,16 @@ const beat_graph_data = {
|
|
|
182
138
|
inputs: {
|
|
183
139
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
184
140
|
prompt: ":beat.moviePrompt",
|
|
185
|
-
imagePath: ":preprocessor.
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
141
|
+
imagePath: ":preprocessor.referenceImageForMovie",
|
|
142
|
+
cache: {
|
|
143
|
+
force: [":context.force", ":forceMovie"],
|
|
144
|
+
file: ":preprocessor.movieFile",
|
|
145
|
+
index: ":__mapIndex",
|
|
146
|
+
sessionType: "movie",
|
|
147
|
+
mulmoContext: ":context",
|
|
148
|
+
},
|
|
191
149
|
params: {
|
|
192
|
-
model: ":
|
|
150
|
+
model: ":preprocessor.movieParams.model",
|
|
193
151
|
duration: ":beat.duration",
|
|
194
152
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
195
153
|
},
|
|
@@ -199,15 +157,14 @@ const beat_graph_data = {
|
|
|
199
157
|
imageFromMovie: {
|
|
200
158
|
if: ":preprocessor.imageFromMovie",
|
|
201
159
|
agent: async (namedInputs) => {
|
|
202
|
-
await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
203
|
-
return { generatedImage: true };
|
|
160
|
+
return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
|
|
204
161
|
},
|
|
205
162
|
inputs: {
|
|
206
|
-
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
163
|
+
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
207
164
|
imageFile: ":preprocessor.imagePath",
|
|
208
165
|
movieFile: ":preprocessor.movieFile",
|
|
209
166
|
},
|
|
210
|
-
defaultValue: {
|
|
167
|
+
defaultValue: {},
|
|
211
168
|
},
|
|
212
169
|
output: {
|
|
213
170
|
agent: "copyAgent",
|
|
@@ -307,21 +264,19 @@ const googleAuth = async () => {
|
|
|
307
264
|
throw error;
|
|
308
265
|
}
|
|
309
266
|
};
|
|
310
|
-
const graphOption = async (context, settings) => {
|
|
311
|
-
const agentFilters = [
|
|
312
|
-
{
|
|
313
|
-
name: "fileCacheAgentFilter",
|
|
314
|
-
agent: fileCacheAgentFilter,
|
|
315
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
|
|
316
|
-
},
|
|
317
|
-
];
|
|
318
|
-
const taskManager = new TaskManager(getConcurrency(context));
|
|
267
|
+
export const graphOption = async (context, settings) => {
|
|
319
268
|
const options = {
|
|
320
|
-
agentFilters
|
|
321
|
-
|
|
269
|
+
agentFilters: [
|
|
270
|
+
{
|
|
271
|
+
name: "fileCacheAgentFilter",
|
|
272
|
+
agent: fileCacheAgentFilter,
|
|
273
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
|
|
274
|
+
},
|
|
275
|
+
],
|
|
276
|
+
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
322
277
|
};
|
|
323
278
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
324
|
-
const config = settings2GraphAIConfig(settings);
|
|
279
|
+
const config = settings2GraphAIConfig(settings, process.env);
|
|
325
280
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
326
281
|
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
327
282
|
userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
|
|
@@ -339,32 +294,6 @@ const graphOption = async (context, settings) => {
|
|
|
339
294
|
options.config = config;
|
|
340
295
|
return options;
|
|
341
296
|
};
|
|
342
|
-
// TODO: unit test
|
|
343
|
-
export const getImageRefs = async (context) => {
|
|
344
|
-
const imageRefs = {};
|
|
345
|
-
const images = context.presentationStyle.imageParams?.images;
|
|
346
|
-
if (images) {
|
|
347
|
-
await Promise.all(Object.keys(images).map(async (key) => {
|
|
348
|
-
const image = images[key];
|
|
349
|
-
if (image.source.kind === "path") {
|
|
350
|
-
imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
|
|
351
|
-
}
|
|
352
|
-
else if (image.source.kind === "url") {
|
|
353
|
-
const response = await fetch(image.source.url);
|
|
354
|
-
if (!response.ok) {
|
|
355
|
-
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
356
|
-
}
|
|
357
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
358
|
-
// Detect file extension from Content-Type header or URL
|
|
359
|
-
const extension = getExtention(response.headers.get("content-type"), image.source.url);
|
|
360
|
-
const imagePath = getReferenceImagePath(context, key, extension);
|
|
361
|
-
await fs.promises.writeFile(imagePath, buffer);
|
|
362
|
-
imageRefs[key] = imagePath;
|
|
363
|
-
}
|
|
364
|
-
}));
|
|
365
|
-
}
|
|
366
|
-
return imageRefs;
|
|
367
|
-
};
|
|
368
297
|
const prepareGenerateImages = async (context) => {
|
|
369
298
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
370
299
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
@@ -373,56 +302,26 @@ const prepareGenerateImages = async (context) => {
|
|
|
373
302
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
|
|
374
303
|
const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
|
|
375
304
|
const imageRefs = await getImageRefs(context);
|
|
376
|
-
// Determine movie agent based on provider
|
|
377
|
-
const getMovieAgent = () => {
|
|
378
|
-
const provider = context.presentationStyle.movieParams?.provider ?? "google";
|
|
379
|
-
switch (provider) {
|
|
380
|
-
case "replicate":
|
|
381
|
-
return "movieReplicateAgent";
|
|
382
|
-
case "google":
|
|
383
|
-
default:
|
|
384
|
-
return "movieGoogleAgent";
|
|
385
|
-
}
|
|
386
|
-
};
|
|
387
305
|
GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
|
|
388
306
|
const injections = {
|
|
389
307
|
context,
|
|
390
308
|
htmlImageAgentInfo,
|
|
391
309
|
movieAgentInfo: {
|
|
392
|
-
agent: getMovieAgent(),
|
|
310
|
+
agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
|
|
393
311
|
},
|
|
394
312
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
395
313
|
imageRefs,
|
|
396
314
|
};
|
|
397
315
|
return injections;
|
|
398
316
|
};
|
|
399
|
-
const
|
|
400
|
-
|
|
401
|
-
return 4;
|
|
402
|
-
}
|
|
403
|
-
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
404
|
-
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
405
|
-
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
406
|
-
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
407
|
-
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
408
|
-
return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
|
|
409
|
-
}
|
|
410
|
-
return 4;
|
|
411
|
-
};
|
|
412
|
-
const generateImages = async (context, settings, callbacks) => {
|
|
413
|
-
const options = await graphOption(context, settings);
|
|
317
|
+
const generateImages = async (context, settings, callbacks, options) => {
|
|
318
|
+
const optionImageAgents = options?.imageAgents ?? {};
|
|
414
319
|
const injections = await prepareGenerateImages(context);
|
|
415
|
-
const
|
|
416
|
-
...
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
imageOpenaiAgent,
|
|
421
|
-
mediaMockAgent,
|
|
422
|
-
fileWriteAgent,
|
|
423
|
-
openAIAgent,
|
|
424
|
-
anthropicAgent,
|
|
425
|
-
}, options);
|
|
320
|
+
const graphaiAgent = {
|
|
321
|
+
...defaultAgents,
|
|
322
|
+
...optionImageAgents,
|
|
323
|
+
};
|
|
324
|
+
const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
|
|
426
325
|
Object.keys(injections).forEach((key) => {
|
|
427
326
|
graph.injectValue(key, injections[key]);
|
|
428
327
|
});
|
|
@@ -434,10 +333,12 @@ const generateImages = async (context, settings, callbacks) => {
|
|
|
434
333
|
const res = await graph.run();
|
|
435
334
|
return res.mergeResult;
|
|
436
335
|
};
|
|
437
|
-
|
|
336
|
+
// public api
|
|
337
|
+
export const images = async (context, args) => {
|
|
338
|
+
const { settings, callbacks, options } = args ?? {};
|
|
438
339
|
try {
|
|
439
340
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
440
|
-
const newContext = await generateImages(context, settings, callbacks);
|
|
341
|
+
const newContext = await generateImages(context, settings, callbacks, options);
|
|
441
342
|
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
442
343
|
return newContext;
|
|
443
344
|
}
|
|
@@ -446,20 +347,12 @@ export const images = async (context, settings, callbacks) => {
|
|
|
446
347
|
throw error;
|
|
447
348
|
}
|
|
448
349
|
};
|
|
449
|
-
|
|
350
|
+
// public api
|
|
351
|
+
export const generateBeatImage = async (inputs) => {
|
|
352
|
+
const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
|
|
450
353
|
const options = await graphOption(context, settings);
|
|
451
354
|
const injections = await prepareGenerateImages(context);
|
|
452
|
-
const graph = new GraphAI(beat_graph_data,
|
|
453
|
-
...vanillaAgents,
|
|
454
|
-
imageGoogleAgent,
|
|
455
|
-
movieGoogleAgent,
|
|
456
|
-
movieReplicateAgent,
|
|
457
|
-
imageOpenaiAgent,
|
|
458
|
-
mediaMockAgent,
|
|
459
|
-
fileWriteAgent,
|
|
460
|
-
openAIAgent,
|
|
461
|
-
anthropicAgent,
|
|
462
|
-
}, options);
|
|
355
|
+
const graph = new GraphAI(beat_graph_data, defaultAgents, options);
|
|
463
356
|
Object.keys(injections).forEach((key) => {
|
|
464
357
|
if ("outputStudioFilePath" !== key) {
|
|
465
358
|
graph.injectValue(key, injections[key]);
|
|
@@ -467,6 +360,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
|
|
|
467
360
|
});
|
|
468
361
|
graph.injectValue("__mapIndex", index);
|
|
469
362
|
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
363
|
+
graph.injectValue("forceMovie", forceMovie ?? false);
|
|
364
|
+
graph.injectValue("forceImage", forceImage ?? false);
|
|
470
365
|
if (callbacks) {
|
|
471
366
|
callbacks.forEach((callback) => {
|
|
472
367
|
graph.registerCallback(callback);
|
package/lib/actions/index.d.ts
CHANGED
package/lib/actions/index.js
CHANGED
package/lib/actions/movie.js
CHANGED
|
@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
218
218
|
// Concatenate the trimmed images
|
|
219
219
|
const concatVideoId = "concat_video";
|
|
220
220
|
const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
|
|
221
|
-
|
|
221
|
+
const inputs = videoIds.map((id) => `[${id}]`).join("");
|
|
222
|
+
const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
|
|
223
|
+
ffmpegContext.filterComplex.push(filter);
|
|
222
224
|
const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
|
|
223
225
|
const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
|
|
224
226
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
package/lib/actions/pdf.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import puppeteer from "puppeteer";
|
|
4
|
+
import { GraphAILogger } from "graphai";
|
|
4
5
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
5
6
|
import { localizedText, isHttp } from "../utils/utils.js";
|
|
6
7
|
import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
|
|
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
|
|
|
17
18
|
const mimeType = ext === "jpg" ? "jpeg" : ext;
|
|
18
19
|
return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
|
|
19
20
|
}
|
|
20
|
-
catch (
|
|
21
|
+
catch (error) {
|
|
22
|
+
GraphAILogger.info("loadImage failed", error);
|
|
21
23
|
const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
|
|
22
24
|
return `data:image/png;base64,${placeholderData.toString("base64")}`;
|
|
23
25
|
}
|
|
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
|
|
|
101
103
|
const imagePaths = studio.beats.map((beat) => beat.imageFile);
|
|
102
104
|
const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
|
|
103
105
|
const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
|
|
104
|
-
const
|
|
106
|
+
const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
|
|
107
|
+
const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
|
|
105
108
|
const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
|
|
106
109
|
const template = getHTMLFile(`pdf_${pdfMode}`);
|
|
107
110
|
const baseTemplateData = {
|
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
6
|
-
export declare const imageGoogleAgent: AgentFunction<{
|
|
7
|
-
model: string;
|
|
8
|
-
canvasSize: {
|
|
9
|
-
width: number;
|
|
10
|
-
height: number;
|
|
11
|
-
};
|
|
12
|
-
}, {
|
|
13
|
-
buffer: Buffer;
|
|
14
|
-
}, {
|
|
15
|
-
prompt: string;
|
|
16
|
-
}, ImageGoogleConfig>;
|
|
2
|
+
import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
|
|
17
4
|
declare const imageGoogleAgentInfo: AgentFunctionInfo;
|
|
18
5
|
export default imageGoogleAgentInfo;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import { getAspectRatio } from "./movie_google_agent.js";
|
|
3
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
3
4
|
async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
4
5
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
|
|
5
6
|
try {
|
|
@@ -7,12 +8,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
7
8
|
const payload = {
|
|
8
9
|
instances: [
|
|
9
10
|
{
|
|
10
|
-
prompt
|
|
11
|
+
prompt,
|
|
11
12
|
},
|
|
12
13
|
],
|
|
13
14
|
parameters: {
|
|
14
15
|
sampleCount: 1,
|
|
15
|
-
aspectRatio
|
|
16
|
+
aspectRatio,
|
|
16
17
|
safetySetting: "block_only_high",
|
|
17
18
|
},
|
|
18
19
|
};
|
|
@@ -51,11 +52,10 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
51
52
|
throw error;
|
|
52
53
|
}
|
|
53
54
|
}
|
|
54
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
55
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
55
56
|
const { prompt } = namedInputs;
|
|
56
57
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
57
|
-
const model = params.model ?? "
|
|
58
|
-
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
58
|
+
const model = params.model ?? provider2ImageAgent["google"].defaultModel;
|
|
59
59
|
const projectId = config?.projectId;
|
|
60
60
|
const token = config?.token;
|
|
61
61
|
try {
|
|
@@ -1,20 +1,5 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type
|
|
3
|
-
export declare const imageOpenaiAgent: AgentFunction<
|
|
4
|
-
apiKey: string;
|
|
5
|
-
model: string;
|
|
6
|
-
moderation: OpenAIModeration | null | undefined;
|
|
7
|
-
canvasSize: {
|
|
8
|
-
width: number;
|
|
9
|
-
height: number;
|
|
10
|
-
};
|
|
11
|
-
}, {
|
|
12
|
-
buffer: Buffer;
|
|
13
|
-
}, {
|
|
14
|
-
prompt: string;
|
|
15
|
-
images: string[] | null | undefined;
|
|
16
|
-
}, {
|
|
17
|
-
apiKey?: string;
|
|
18
|
-
}>;
|
|
2
|
+
import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
|
|
19
4
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
20
5
|
export default imageOpenaiAgentInfo;
|
|
@@ -2,14 +2,14 @@ import fs from "fs";
|
|
|
2
2
|
import path from "path";
|
|
3
3
|
import { GraphAILogger } from "graphai";
|
|
4
4
|
import OpenAI, { toFile } from "openai";
|
|
5
|
-
import {
|
|
5
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
|
-
export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
8
|
-
const { prompt,
|
|
7
|
+
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
|
+
const { prompt, referenceImages } = namedInputs;
|
|
9
9
|
const { moderation, canvasSize } = params;
|
|
10
|
-
const { apiKey } = { ...config };
|
|
11
|
-
const model = params.model ??
|
|
12
|
-
const openai = new OpenAI({ apiKey });
|
|
10
|
+
const { apiKey, baseURL } = { ...config };
|
|
11
|
+
const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
|
|
12
|
+
const openai = new OpenAI({ apiKey, baseURL });
|
|
13
13
|
const size = (() => {
|
|
14
14
|
if (model === "gpt-image-1") {
|
|
15
15
|
if (canvasSize.width > canvasSize.height) {
|
|
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
|
46
46
|
const response = await (async () => {
|
|
47
47
|
try {
|
|
48
48
|
const targetSize = imageOptions.size;
|
|
49
|
-
if ((
|
|
50
|
-
const
|
|
49
|
+
if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
|
|
50
|
+
const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
|
|
51
51
|
const ext = path.extname(file).toLowerCase();
|
|
52
52
|
const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
|
|
53
53
|
return await toFile(fs.createReadStream(file), null, { type });
|
|
54
54
|
}));
|
|
55
|
-
return await openai.images.edit({ ...imageOptions, size: targetSize, image:
|
|
55
|
+
return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
|
|
56
56
|
}
|
|
57
57
|
else {
|
|
58
58
|
return await openai.images.generate(imageOptions);
|
|
@@ -1,24 +1,9 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
projectId?: string;
|
|
4
|
-
token?: string;
|
|
5
|
-
};
|
|
2
|
+
import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
|
|
6
3
|
export declare const getAspectRatio: (canvasSize: {
|
|
7
4
|
width: number;
|
|
8
5
|
height: number;
|
|
9
6
|
}) => string;
|
|
10
|
-
export declare const movieGoogleAgent: AgentFunction<
|
|
11
|
-
model: string;
|
|
12
|
-
canvasSize: {
|
|
13
|
-
width: number;
|
|
14
|
-
height: number;
|
|
15
|
-
};
|
|
16
|
-
duration?: number;
|
|
17
|
-
}, {
|
|
18
|
-
buffer: Buffer;
|
|
19
|
-
}, {
|
|
20
|
-
prompt: string;
|
|
21
|
-
imagePath?: string;
|
|
22
|
-
}, MovieGoogleConfig>;
|
|
7
|
+
export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
|
|
23
8
|
declare const movieGoogleAgentInfo: AgentFunctionInfo;
|
|
24
9
|
export default movieGoogleAgentInfo;
|