mulmocast 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -3
- package/assets/templates/shorts.json +18 -0
- package/lib/actions/audio.d.ts +2 -1
- package/lib/actions/audio.js +6 -1
- package/lib/actions/images.d.ts +2 -1
- package/lib/actions/images.js +43 -50
- package/lib/actions/movie.js +9 -5
- package/lib/actions/translate.d.ts +2 -1
- package/lib/actions/translate.js +6 -1
- package/lib/agents/combine_audio_files_agent.js +4 -0
- package/lib/agents/image_google_agent.d.ts +4 -1
- package/lib/agents/image_google_agent.js +3 -2
- package/lib/agents/image_openai_agent.d.ts +5 -3
- package/lib/agents/image_openai_agent.js +29 -4
- package/lib/agents/movie_google_agent.d.ts +9 -2
- package/lib/agents/movie_google_agent.js +24 -16
- package/lib/index.d.ts +5 -0
- package/lib/index.js +5 -0
- package/lib/methods/mulmo_script.d.ts +0 -1
- package/lib/methods/mulmo_script.js +0 -5
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.js +1 -0
- package/lib/types/schema.d.ts +21 -54
- package/lib/types/schema.js +1 -1
- package/lib/utils/file.d.ts +1 -0
- package/lib/utils/file.js +12 -8
- package/lib/utils/image_plugins/image.d.ts +1 -1
- package/lib/utils/image_plugins/movie.d.ts +1 -1
- package/lib/utils/preprocess.d.ts +2 -3
- package/package.json +8 -8
- package/scripts/templates/shorts_template.json +52 -0
package/README.md
CHANGED
|
@@ -90,11 +90,28 @@ Create a `.env` file in your project directory with the following API keys:
|
|
|
90
90
|
```bash
|
|
91
91
|
OPENAI_API_KEY=your_openai_api_key
|
|
92
92
|
```
|
|
93
|
-
|
|
93
|
+
|
|
94
|
+
#### (Optional) For the advanced image generation model
|
|
94
95
|
```bash
|
|
95
96
|
DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
#### (Optional) For Google's image generation model
|
|
100
|
+
```bash
|
|
101
|
+
GOOGLE_PROJECT_ID=your_google_project_id
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
You may also need to take the following steps before running any commands:
|
|
105
|
+
1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
|
|
106
|
+
2. Login by `gcloud auth application-default login`
|
|
107
|
+
|
|
108
|
+
#### (Optional) For Nijivoice's TTS model
|
|
109
|
+
```bash
|
|
110
|
+
NIJIVOICE_API_KEY=your_nijivoice_api_key
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### (Optional) to access web in mulmo tool
|
|
114
|
+
```bash
|
|
98
115
|
BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
|
|
99
116
|
```
|
|
100
117
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Short movie template",
|
|
3
|
+
"description": "Template for Youtube shorts.",
|
|
4
|
+
"systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0"
|
|
8
|
+
},
|
|
9
|
+
"canvasSize": {
|
|
10
|
+
"width": 720,
|
|
11
|
+
"height": 1280
|
|
12
|
+
},
|
|
13
|
+
"imageParams": {
|
|
14
|
+
"style": "<style>Photo realistic, cinematic.</style>"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"scriptName": "movie_prompts_template.json"
|
|
18
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const audio: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -160,7 +160,7 @@ const agentFilters = [
|
|
|
160
160
|
nodeIds: ["tts"],
|
|
161
161
|
},
|
|
162
162
|
];
|
|
163
|
-
export const audio = async (context) => {
|
|
163
|
+
export const audio = async (context, callbacks) => {
|
|
164
164
|
try {
|
|
165
165
|
MulmoStudioMethods.setSessionState(context.studio, "audio", true);
|
|
166
166
|
const { studio, fileDirs, lang } = context;
|
|
@@ -187,6 +187,11 @@ export const audio = async (context) => {
|
|
|
187
187
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
188
188
|
graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
|
|
189
189
|
graph.injectValue("audioDirPath", audioDirPath);
|
|
190
|
+
if (callbacks) {
|
|
191
|
+
callbacks.forEach((callback) => {
|
|
192
|
+
graph.registerCallback(callback);
|
|
193
|
+
});
|
|
194
|
+
}
|
|
190
195
|
await graph.run();
|
|
191
196
|
writingMessage(audioCombinedFilePath);
|
|
192
197
|
}
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
+
import type { CallbackFunction } from "graphai";
|
|
1
2
|
import { MulmoStudioContext } from "../types/index.js";
|
|
2
|
-
export declare const images: (context: MulmoStudioContext) => Promise<void>;
|
|
3
|
+
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -25,14 +25,10 @@ const htmlStyle = (script, beat) => {
|
|
|
25
25
|
const imagePreprocessAgent = async (namedInputs) => {
|
|
26
26
|
const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
|
|
27
27
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
28
|
-
if (!imageParams.size) {
|
|
29
|
-
const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
|
|
30
|
-
imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
|
|
31
|
-
}
|
|
32
28
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
33
29
|
const returnValue = {
|
|
34
|
-
aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
|
|
35
30
|
imageParams,
|
|
31
|
+
movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
|
|
36
32
|
};
|
|
37
33
|
if (beat.image) {
|
|
38
34
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
@@ -42,20 +38,24 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
42
38
|
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
43
39
|
const path = await plugin.process(processorParams);
|
|
44
40
|
// undefined prompt indicates that image generation is not needed
|
|
45
|
-
return { path, ...returnValue };
|
|
41
|
+
return { imagePath: path, ...returnValue };
|
|
46
42
|
}
|
|
47
43
|
finally {
|
|
48
44
|
MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
|
|
49
45
|
}
|
|
50
46
|
}
|
|
51
47
|
}
|
|
52
|
-
|
|
48
|
+
// images for "edit_image"
|
|
53
49
|
const images = (() => {
|
|
54
50
|
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
55
51
|
const sources = imageNames.map((name) => imageRefs[name]);
|
|
56
52
|
return sources.filter((source) => source !== undefined);
|
|
57
53
|
})();
|
|
58
|
-
|
|
54
|
+
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
55
|
+
return { ...returnValue, images }; // no image prompt, only movie prompt
|
|
56
|
+
}
|
|
57
|
+
const prompt = imagePrompt(beat, imageParams.style);
|
|
58
|
+
return { imagePath, prompt, ...returnValue, images };
|
|
59
59
|
};
|
|
60
60
|
const graph_data = {
|
|
61
61
|
version: 0.5,
|
|
@@ -100,54 +100,36 @@ const graph_data = {
|
|
|
100
100
|
retry: 3,
|
|
101
101
|
inputs: {
|
|
102
102
|
prompt: ":preprocessor.prompt",
|
|
103
|
-
|
|
103
|
+
images: ":preprocessor.images",
|
|
104
|
+
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
104
105
|
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
105
|
-
force: ":context.force",
|
|
106
|
-
studio: ":context.studio", // for
|
|
107
|
-
index: ":__mapIndex", // for
|
|
108
|
-
sessionType: "image", // for
|
|
106
|
+
force: ":context.force", // only for fileCacheAgentFilter
|
|
107
|
+
studio: ":context.studio", // for fileCacheAgentFilter
|
|
108
|
+
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
109
|
+
sessionType: "image", // for fileCacheAgentFilter
|
|
109
110
|
params: {
|
|
110
111
|
model: ":preprocessor.imageParams.model",
|
|
111
|
-
size: ":preprocessor.imageParams.size",
|
|
112
112
|
moderation: ":preprocessor.imageParams.moderation",
|
|
113
|
-
|
|
114
|
-
images: ":preprocessor.images",
|
|
113
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
115
114
|
},
|
|
116
115
|
},
|
|
117
116
|
defaultValue: {},
|
|
118
117
|
},
|
|
119
|
-
prepareMovie: {
|
|
120
|
-
agent: (namedInputs) => {
|
|
121
|
-
const { beat, imageDirPath, index, context } = namedInputs;
|
|
122
|
-
if (beat.moviePrompt) {
|
|
123
|
-
const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
|
|
124
|
-
return { movieFile };
|
|
125
|
-
}
|
|
126
|
-
return {};
|
|
127
|
-
},
|
|
128
|
-
inputs: {
|
|
129
|
-
result: ":imageGenerator", // to wait for imageGenerator to finish
|
|
130
|
-
imagePath: ":preprocessor.path",
|
|
131
|
-
beat: ":beat",
|
|
132
|
-
imageDirPath: ":imageDirPath",
|
|
133
|
-
index: ":__mapIndex",
|
|
134
|
-
context: ":context",
|
|
135
|
-
},
|
|
136
|
-
},
|
|
137
118
|
movieGenerator: {
|
|
138
|
-
if: ":
|
|
119
|
+
if: ":preprocessor.movieFile",
|
|
139
120
|
agent: "movieGoogleAgent",
|
|
140
121
|
inputs: {
|
|
122
|
+
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
141
123
|
prompt: ":beat.moviePrompt",
|
|
142
|
-
imagePath: ":preprocessor.
|
|
143
|
-
file: ":
|
|
124
|
+
imagePath: ":preprocessor.imagePath",
|
|
125
|
+
file: ":preprocessor.movieFile",
|
|
144
126
|
studio: ":context.studio", // for cache
|
|
145
127
|
index: ":__mapIndex", // for cache
|
|
146
128
|
sessionType: "movie", // for cache
|
|
147
129
|
params: {
|
|
148
130
|
model: ":context.studio.script.movieParams.model",
|
|
149
|
-
aspectRatio: ":preprocessor.aspectRatio",
|
|
150
131
|
duration: ":beat.duration",
|
|
132
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
151
133
|
},
|
|
152
134
|
},
|
|
153
135
|
defaultValue: {},
|
|
@@ -156,8 +138,8 @@ const graph_data = {
|
|
|
156
138
|
agent: "copyAgent",
|
|
157
139
|
inputs: {
|
|
158
140
|
onComplete: ":movieGenerator",
|
|
159
|
-
imageFile: ":preprocessor.
|
|
160
|
-
movieFile: ":
|
|
141
|
+
imageFile: ":preprocessor.imagePath",
|
|
142
|
+
movieFile: ":preprocessor.movieFile",
|
|
161
143
|
},
|
|
162
144
|
isResult: true,
|
|
163
145
|
},
|
|
@@ -180,7 +162,7 @@ const graph_data = {
|
|
|
180
162
|
context: ":context",
|
|
181
163
|
},
|
|
182
164
|
},
|
|
183
|
-
|
|
165
|
+
writeOutput: {
|
|
184
166
|
// console: { before: true },
|
|
185
167
|
agent: "fileWriteAgent",
|
|
186
168
|
inputs: {
|
|
@@ -191,14 +173,20 @@ const graph_data = {
|
|
|
191
173
|
},
|
|
192
174
|
};
|
|
193
175
|
const googleAuth = async () => {
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
176
|
+
try {
|
|
177
|
+
const auth = new GoogleAuth({
|
|
178
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
179
|
+
});
|
|
180
|
+
const client = await auth.getClient();
|
|
181
|
+
const accessToken = await client.getAccessToken();
|
|
182
|
+
return accessToken.token;
|
|
183
|
+
}
|
|
184
|
+
catch (__error) {
|
|
185
|
+
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
186
|
+
process.exit(1);
|
|
187
|
+
}
|
|
200
188
|
};
|
|
201
|
-
const generateImages = async (context) => {
|
|
189
|
+
const generateImages = async (context, callbacks) => {
|
|
202
190
|
const { studio, fileDirs } = context;
|
|
203
191
|
const { outDirPath, imageDirPath } = fileDirs;
|
|
204
192
|
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
@@ -266,12 +254,17 @@ const generateImages = async (context) => {
|
|
|
266
254
|
Object.keys(injections).forEach((key) => {
|
|
267
255
|
graph.injectValue(key, injections[key]);
|
|
268
256
|
});
|
|
257
|
+
if (callbacks) {
|
|
258
|
+
callbacks.forEach((callback) => {
|
|
259
|
+
graph.registerCallback(callback);
|
|
260
|
+
});
|
|
261
|
+
}
|
|
269
262
|
await graph.run();
|
|
270
263
|
};
|
|
271
|
-
export const images = async (context) => {
|
|
264
|
+
export const images = async (context, callbacks) => {
|
|
272
265
|
try {
|
|
273
266
|
MulmoStudioMethods.setSessionState(context.studio, "image", true);
|
|
274
|
-
await generateImages(context);
|
|
267
|
+
await generateImages(context, callbacks);
|
|
275
268
|
}
|
|
276
269
|
finally {
|
|
277
270
|
MulmoStudioMethods.setSessionState(context.studio, "image", false);
|
package/lib/actions/movie.js
CHANGED
|
@@ -61,8 +61,8 @@ const getOutputOption = (audioId) => {
|
|
|
61
61
|
const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
|
|
62
62
|
const start = performance.now();
|
|
63
63
|
const ffmpegContext = FfmpegContextInit();
|
|
64
|
-
if (studio.beats.some((beat) => !beat.imageFile)) {
|
|
65
|
-
GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
|
|
64
|
+
if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
|
|
65
|
+
GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
|
|
66
66
|
return;
|
|
67
67
|
}
|
|
68
68
|
const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
@@ -71,10 +71,14 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
71
71
|
const filterComplexAudioIds = [];
|
|
72
72
|
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
73
|
const beat = studio.script.beats[index];
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
75
|
+
if (!sourceFile) {
|
|
76
|
+
throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
76
77
|
}
|
|
77
|
-
|
|
78
|
+
if (!studioBeat.duration) {
|
|
79
|
+
throw new Error(`studioBeat.duration is not set: index=${index}`);
|
|
80
|
+
}
|
|
81
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
78
82
|
const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
|
|
79
83
|
const extraPadding = (() => {
|
|
80
84
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const translate: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/translate.js
CHANGED
|
@@ -208,7 +208,7 @@ const agentFilters = [
|
|
|
208
208
|
];
|
|
209
209
|
const defaultLang = "en";
|
|
210
210
|
const targetLangs = ["ja", "en"];
|
|
211
|
-
export const translate = async (context) => {
|
|
211
|
+
export const translate = async (context, callbacks) => {
|
|
212
212
|
try {
|
|
213
213
|
MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
|
|
214
214
|
const { studio, fileDirs } = context;
|
|
@@ -222,6 +222,11 @@ export const translate = async (context) => {
|
|
|
222
222
|
graph.injectValue("targetLangs", targetLangs);
|
|
223
223
|
graph.injectValue("outDirPath", outDirPath);
|
|
224
224
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
225
|
+
if (callbacks) {
|
|
226
|
+
callbacks.forEach((callback) => {
|
|
227
|
+
graph.registerCallback(callback);
|
|
228
|
+
});
|
|
229
|
+
}
|
|
225
230
|
const results = await graph.run();
|
|
226
231
|
writingMessage(outputStudioFilePath);
|
|
227
232
|
if (results.mergeStudioResult) {
|
|
@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
26
26
|
const totalPadding = await (async () => {
|
|
27
27
|
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
28
28
|
const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
|
|
29
|
+
// NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
|
|
29
30
|
const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
|
|
30
31
|
if (movieDuration > audioDuration) {
|
|
31
32
|
return padding + (movieDuration - audioDuration);
|
|
32
33
|
}
|
|
33
34
|
}
|
|
35
|
+
else if (beat.duration && beat.duration > audioDuration) {
|
|
36
|
+
return padding + (beat.duration - audioDuration);
|
|
37
|
+
}
|
|
34
38
|
return padding;
|
|
35
39
|
})();
|
|
36
40
|
studioBeat.duration = audioDuration + totalPadding;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { getAspectRatio } from "./movie_google_agent.js";
|
|
2
3
|
async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
3
4
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
|
|
4
5
|
try {
|
|
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
50
51
|
throw error;
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config
|
|
54
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
54
55
|
const { prompt } = namedInputs;
|
|
55
|
-
const aspectRatio = params.
|
|
56
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
56
57
|
const model = params.model ?? "imagen-3.0-fast-generate-001";
|
|
57
58
|
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
58
59
|
const projectId = config?.projectId;
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
|
|
3
2
|
type OpenAIModeration = "low" | "auto";
|
|
4
3
|
export declare const imageOpenaiAgent: AgentFunction<{
|
|
5
4
|
apiKey: string;
|
|
6
5
|
model: string;
|
|
7
|
-
size: OpenAIImageSize | null | undefined;
|
|
8
6
|
moderation: OpenAIModeration | null | undefined;
|
|
9
|
-
|
|
7
|
+
canvasSize: {
|
|
8
|
+
width: number;
|
|
9
|
+
height: number;
|
|
10
|
+
};
|
|
10
11
|
}, {
|
|
11
12
|
buffer: Buffer;
|
|
12
13
|
}, {
|
|
13
14
|
prompt: string;
|
|
15
|
+
images: string[] | null | undefined;
|
|
14
16
|
}>;
|
|
15
17
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
16
18
|
export default imageOpenaiAgentInfo;
|
|
@@ -2,14 +2,39 @@ import fs from "fs";
|
|
|
2
2
|
import OpenAI, { toFile } from "openai";
|
|
3
3
|
// https://platform.openai.com/docs/guides/image-generation
|
|
4
4
|
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
5
|
-
const { prompt } = namedInputs;
|
|
6
|
-
const { apiKey,
|
|
5
|
+
const { prompt, images } = namedInputs;
|
|
6
|
+
const { apiKey, moderation, canvasSize } = params;
|
|
7
|
+
const model = params.model ?? "dall-e-3";
|
|
7
8
|
const openai = new OpenAI({ apiKey });
|
|
9
|
+
const size = (() => {
|
|
10
|
+
if (model === "gpt-image-1") {
|
|
11
|
+
if (canvasSize.width > canvasSize.height) {
|
|
12
|
+
return "1536x1024";
|
|
13
|
+
}
|
|
14
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
15
|
+
return "1024x1536";
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
return "1024x1024";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
if (canvasSize.width > canvasSize.height) {
|
|
23
|
+
return "1792x1024";
|
|
24
|
+
}
|
|
25
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
26
|
+
return "1024x1792";
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
return "1024x1024";
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
})();
|
|
8
33
|
const imageOptions = {
|
|
9
|
-
model
|
|
34
|
+
model,
|
|
10
35
|
prompt,
|
|
11
36
|
n: 1,
|
|
12
|
-
size
|
|
37
|
+
size,
|
|
13
38
|
};
|
|
14
39
|
if (model === "gpt-image-1") {
|
|
15
40
|
imageOptions.moderation = moderation || "auto";
|
|
@@ -3,15 +3,22 @@ export type MovieGoogleConfig = {
|
|
|
3
3
|
projectId?: string;
|
|
4
4
|
token?: string;
|
|
5
5
|
};
|
|
6
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
7
|
+
width: number;
|
|
8
|
+
height: number;
|
|
9
|
+
}) => string;
|
|
6
10
|
export declare const movieGoogleAgent: AgentFunction<{
|
|
7
11
|
model: string;
|
|
8
|
-
|
|
12
|
+
canvasSize: {
|
|
13
|
+
width: number;
|
|
14
|
+
height: number;
|
|
15
|
+
};
|
|
9
16
|
duration?: number;
|
|
10
17
|
}, {
|
|
11
18
|
buffer: Buffer;
|
|
12
19
|
}, {
|
|
13
20
|
prompt: string;
|
|
14
|
-
imagePath
|
|
21
|
+
imagePath?: string;
|
|
15
22
|
}, MovieGoogleConfig>;
|
|
16
23
|
declare const movieGoogleAgentInfo: AgentFunctionInfo;
|
|
17
24
|
export default movieGoogleAgentInfo;
|
|
@@ -2,26 +2,29 @@ import { readFileSync } from "fs";
|
|
|
2
2
|
import { GraphAILogger, sleep } from "graphai";
|
|
3
3
|
async function generateMovie(projectId, model, token, prompt, imagePath, aspectRatio, duration) {
|
|
4
4
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}`;
|
|
5
|
-
// Prepare the payload for the API request
|
|
6
|
-
const buffer = readFileSync(imagePath);
|
|
7
|
-
const bytesBase64Encoded = buffer.toString("base64");
|
|
8
5
|
const payload = {
|
|
9
6
|
instances: [
|
|
10
7
|
{
|
|
11
8
|
prompt: prompt,
|
|
12
|
-
image:
|
|
13
|
-
bytesBase64Encoded,
|
|
14
|
-
mimeType: "image/png",
|
|
15
|
-
},
|
|
9
|
+
image: undefined,
|
|
16
10
|
},
|
|
17
11
|
],
|
|
18
12
|
parameters: {
|
|
19
13
|
sampleCount: 1,
|
|
20
14
|
aspectRatio: aspectRatio,
|
|
21
|
-
|
|
15
|
+
safetySetting: "block_only_high",
|
|
16
|
+
personGeneration: "allow_all",
|
|
22
17
|
durationSeconds: duration,
|
|
23
18
|
},
|
|
24
19
|
};
|
|
20
|
+
if (imagePath) {
|
|
21
|
+
const buffer = readFileSync(imagePath);
|
|
22
|
+
const bytesBase64Encoded = buffer.toString("base64");
|
|
23
|
+
payload.instances[0].image = {
|
|
24
|
+
bytesBase64Encoded,
|
|
25
|
+
mimeType: "image/png",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
25
28
|
// Make the API call using fetch
|
|
26
29
|
const response = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:predictLongRunning`, {
|
|
27
30
|
method: "POST",
|
|
@@ -32,6 +35,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
32
35
|
body: JSON.stringify(payload),
|
|
33
36
|
});
|
|
34
37
|
if (!response.ok) {
|
|
38
|
+
GraphAILogger.info("create project on google cloud console and setup the project. More details see readme.");
|
|
35
39
|
throw new Error(`Error: ${response.status} - ${response.statusText}`);
|
|
36
40
|
}
|
|
37
41
|
const initialResponse = await response.json();
|
|
@@ -72,18 +76,22 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
72
76
|
}
|
|
73
77
|
return undefined;
|
|
74
78
|
}
|
|
79
|
+
export const getAspectRatio = (canvasSize) => {
|
|
80
|
+
if (canvasSize.width > canvasSize.height) {
|
|
81
|
+
return "16:9";
|
|
82
|
+
}
|
|
83
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
84
|
+
return "9:16";
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
return "1:1";
|
|
88
|
+
}
|
|
89
|
+
};
|
|
75
90
|
export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
76
91
|
const { prompt, imagePath } = namedInputs;
|
|
77
|
-
|
|
78
|
-
if (prompt) {
|
|
79
|
-
const buffer = Buffer.from(prompt);
|
|
80
|
-
return { buffer };
|
|
81
|
-
}
|
|
82
|
-
*/
|
|
83
|
-
const aspectRatio = params.aspectRatio ?? "16:9";
|
|
92
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
84
93
|
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
85
94
|
const duration = params.duration ?? 8;
|
|
86
|
-
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
87
95
|
const projectId = config?.projectId;
|
|
88
96
|
const token = config?.token;
|
|
89
97
|
try {
|
package/lib/index.d.ts
ADDED
package/lib/index.js
ADDED
|
@@ -2,7 +2,6 @@ import "dotenv/config";
|
|
|
2
2
|
import { MulmoCanvasDimension, MulmoScript, MulmoBeat, SpeechOptions, Text2SpeechProvider, Text2ImageAgentInfo, BeatMediaType } from "../types/index.js";
|
|
3
3
|
export declare const MulmoScriptMethods: {
|
|
4
4
|
getCanvasSize(script: MulmoScript): MulmoCanvasDimension;
|
|
5
|
-
getAspectRatio(script: MulmoScript): string;
|
|
6
5
|
getSpeechProvider(script: MulmoScript): Text2SpeechProvider;
|
|
7
6
|
getTextSlideStyle(script: MulmoScript, beat: MulmoBeat): string;
|
|
8
7
|
getSpeechOptions(script: MulmoScript, beat: MulmoBeat): SpeechOptions | undefined;
|
|
@@ -18,11 +18,6 @@ export const MulmoScriptMethods = {
|
|
|
18
18
|
getCanvasSize(script) {
|
|
19
19
|
return mulmoCanvasDimensionSchema.parse(script.canvasSize);
|
|
20
20
|
},
|
|
21
|
-
getAspectRatio(script) {
|
|
22
|
-
// Google's text2image specific parameter
|
|
23
|
-
const size = this.getCanvasSize(script);
|
|
24
|
-
return size.width > size.height ? "16:9" : "9:16";
|
|
25
|
-
},
|
|
26
21
|
getSpeechProvider(script) {
|
|
27
22
|
return text2SpeechProviderSchema.parse(script.speechParams?.provider);
|
|
28
23
|
},
|
package/lib/types/index.d.ts
CHANGED
package/lib/types/index.js
CHANGED