mulmocast 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -3
- package/assets/templates/akira_comic.json +2 -2
- package/assets/templates/drslump_comic.json +2 -2
- package/assets/templates/ghibli_comic.json +2 -2
- package/assets/templates/ghost_comic.json +2 -2
- package/assets/templates/onepiece_comic.json +2 -2
- package/assets/templates/portrait_movie.json +28 -0
- package/assets/templates/realistic_movie.json +28 -0
- package/assets/templates/shorts.json +18 -0
- package/lib/actions/audio.d.ts +2 -1
- package/lib/actions/audio.js +8 -3
- package/lib/actions/captions.js +2 -2
- package/lib/actions/images.d.ts +2 -1
- package/lib/actions/images.js +68 -32
- package/lib/actions/movie.js +10 -6
- package/lib/actions/translate.d.ts +2 -1
- package/lib/actions/translate.js +8 -3
- package/lib/agents/combine_audio_files_agent.js +4 -0
- package/lib/agents/image_google_agent.d.ts +4 -1
- package/lib/agents/image_google_agent.js +3 -2
- package/lib/agents/image_openai_agent.d.ts +5 -3
- package/lib/agents/image_openai_agent.js +29 -4
- package/lib/agents/movie_google_agent.d.ts +24 -0
- package/lib/agents/movie_google_agent.js +122 -0
- package/lib/cli/bin.js +12 -0
- package/lib/index.d.ts +5 -0
- package/lib/index.js +5 -0
- package/lib/methods/mulmo_script.d.ts +0 -1
- package/lib/methods/mulmo_script.js +0 -5
- package/lib/methods/mulmo_studio.d.ts +1 -1
- package/lib/tools/create_mulmo_script_from_url.js +2 -2
- package/lib/tools/create_mulmo_script_interactively.js +2 -2
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.js +1 -0
- package/lib/types/schema.d.ts +155 -54
- package/lib/types/schema.js +14 -2
- package/lib/types/type.d.ts +3 -1
- package/lib/utils/file.d.ts +1 -0
- package/lib/utils/file.js +12 -8
- package/lib/utils/image_plugins/image.d.ts +1 -1
- package/lib/utils/image_plugins/movie.d.ts +1 -1
- package/lib/utils/preprocess.d.ts +9 -3
- package/lib/utils/utils.d.ts +1 -0
- package/lib/utils/utils.js +3 -0
- package/package.json +8 -8
- package/scripts/templates/movie_prompts_template.json +50 -0
- package/scripts/templates/shorts_template.json +52 -0
package/README.md
CHANGED
|
@@ -90,11 +90,28 @@ Create a `.env` file in your project directory with the following API keys:
|
|
|
90
90
|
```bash
|
|
91
91
|
OPENAI_API_KEY=your_openai_api_key
|
|
92
92
|
```
|
|
93
|
-
|
|
93
|
+
|
|
94
|
+
#### (Optional) For the advanced image generation model
|
|
94
95
|
```bash
|
|
95
96
|
DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
#### (Optional) For Google's image generation model
|
|
100
|
+
```bash
|
|
101
|
+
GOOGLE_PROJECT_ID=your_google_project_id
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
You may also need to take the following steps before running any commands:
|
|
105
|
+
1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
|
|
106
|
+
2. Login by `gcloud auth application-default login`
|
|
107
|
+
|
|
108
|
+
#### (Optional) For Nijivoice's TTS model
|
|
109
|
+
```bash
|
|
110
|
+
NIJIVOICE_API_KEY=your_nijivoice_api_key
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
#### (Optional) to access web in mulmo tool
|
|
114
|
+
```bash
|
|
98
115
|
BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
|
|
99
116
|
```
|
|
100
117
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "
|
|
3
|
-
"description": "Template for
|
|
2
|
+
"title": "Akira style",
|
|
3
|
+
"description": "Template for Akira style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Dr. Slump Style
|
|
3
|
-
"description": "Template for Dr. Slump
|
|
2
|
+
"title": "Dr. Slump Style",
|
|
3
|
+
"description": "Template for Dr. Slump style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "
|
|
3
|
-
"description": "Template for
|
|
2
|
+
"title": "Ghibli comic style",
|
|
3
|
+
"description": "Template for Ghibli-style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "
|
|
3
|
-
"description": "Template for
|
|
2
|
+
"title": "Ghost in the shell style",
|
|
3
|
+
"description": "Template for Ghost in the shell style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "
|
|
3
|
-
"description": "Template for
|
|
2
|
+
"title": "One Piece style",
|
|
3
|
+
"description": "Template for One Piece style comic presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Photo realistic movie (portrait)",
|
|
3
|
+
"description": "Template for photo realistic movie in portrait mode.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1024,
|
|
12
|
+
"height": 1536
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Photo realistic, cinematic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "movie_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Photo realistic movie template",
|
|
3
|
+
"description": "Template for photo realistic movie.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Photo realistic, cinematic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "movie_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Short movie template",
|
|
3
|
+
"description": "Template for Youtube shorts.",
|
|
4
|
+
"systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0"
|
|
8
|
+
},
|
|
9
|
+
"canvasSize": {
|
|
10
|
+
"width": 720,
|
|
11
|
+
"height": 1280
|
|
12
|
+
},
|
|
13
|
+
"imageParams": {
|
|
14
|
+
"style": "<style>Photo realistic, cinematic.</style>"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"scriptName": "movie_prompts_template.json"
|
|
18
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const audio: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI } from "graphai";
|
|
3
|
-
import
|
|
3
|
+
import * as agents from "@graphai/vanilla";
|
|
4
4
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
5
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
6
6
|
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
12
12
|
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
13
13
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
14
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
15
|
-
|
|
15
|
+
const vanillaAgents = agents.default ?? agents;
|
|
16
16
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
17
17
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
18
18
|
const provider_to_agent = {
|
|
@@ -160,7 +160,7 @@ const agentFilters = [
|
|
|
160
160
|
nodeIds: ["tts"],
|
|
161
161
|
},
|
|
162
162
|
];
|
|
163
|
-
export const audio = async (context) => {
|
|
163
|
+
export const audio = async (context, callbacks) => {
|
|
164
164
|
try {
|
|
165
165
|
MulmoStudioMethods.setSessionState(context.studio, "audio", true);
|
|
166
166
|
const { studio, fileDirs, lang } = context;
|
|
@@ -187,6 +187,11 @@ export const audio = async (context) => {
|
|
|
187
187
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
188
188
|
graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
|
|
189
189
|
graph.injectValue("audioDirPath", audioDirPath);
|
|
190
|
+
if (callbacks) {
|
|
191
|
+
callbacks.forEach((callback) => {
|
|
192
|
+
graph.registerCallback(callback);
|
|
193
|
+
});
|
|
194
|
+
}
|
|
190
195
|
await graph.run();
|
|
191
196
|
writingMessage(audioCombinedFilePath);
|
|
192
197
|
}
|
package/lib/actions/captions.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
|
-
import
|
|
2
|
+
import * as agents from "@graphai/vanilla";
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
5
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
6
|
-
|
|
6
|
+
const vanillaAgents = agents.default ?? agents;
|
|
7
7
|
const graph_data = {
|
|
8
8
|
version: 0.5,
|
|
9
9
|
nodes: {
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
+
import type { CallbackFunction } from "graphai";
|
|
1
2
|
import { MulmoStudioContext } from "../types/index.js";
|
|
2
|
-
export declare const images: (context: MulmoStudioContext) => Promise<void>;
|
|
3
|
+
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
|
-
import
|
|
4
|
+
import * as agents from "@graphai/vanilla";
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
6
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
7
7
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
8
8
|
import imageGoogleAgent from "../agents/image_google_agent.js";
|
|
9
9
|
import imageOpenaiAgent from "../agents/image_openai_agent.js";
|
|
10
|
+
import movieGoogleAgent from "../agents/movie_google_agent.js";
|
|
10
11
|
import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
12
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
12
13
|
import { imagePrompt } from "../utils/prompt.js";
|
|
13
|
-
|
|
14
|
+
const vanillaAgents = agents.default ?? agents;
|
|
14
15
|
dotenv.config();
|
|
15
16
|
// const openai = new OpenAI();
|
|
16
17
|
import { GoogleAuth } from "google-auth-library";
|
|
@@ -26,8 +27,8 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
26
27
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
27
28
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
28
29
|
const returnValue = {
|
|
29
|
-
aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
|
|
30
30
|
imageParams,
|
|
31
|
+
movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
|
|
31
32
|
};
|
|
32
33
|
if (beat.image) {
|
|
33
34
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
@@ -37,20 +38,24 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
37
38
|
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
38
39
|
const path = await plugin.process(processorParams);
|
|
39
40
|
// undefined prompt indicates that image generation is not needed
|
|
40
|
-
return { path, ...returnValue };
|
|
41
|
+
return { imagePath: path, ...returnValue };
|
|
41
42
|
}
|
|
42
43
|
finally {
|
|
43
44
|
MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
|
|
44
45
|
}
|
|
45
46
|
}
|
|
46
47
|
}
|
|
47
|
-
|
|
48
|
+
// images for "edit_image"
|
|
48
49
|
const images = (() => {
|
|
49
50
|
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
50
51
|
const sources = imageNames.map((name) => imageRefs[name]);
|
|
51
52
|
return sources.filter((source) => source !== undefined);
|
|
52
53
|
})();
|
|
53
|
-
|
|
54
|
+
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
55
|
+
return { ...returnValue, images }; // no image prompt, only movie prompt
|
|
56
|
+
}
|
|
57
|
+
const prompt = imagePrompt(beat, imageParams.style);
|
|
58
|
+
return { imagePath, prompt, ...returnValue, images };
|
|
54
59
|
};
|
|
55
60
|
const graph_data = {
|
|
56
61
|
version: 0.5,
|
|
@@ -95,18 +100,36 @@ const graph_data = {
|
|
|
95
100
|
retry: 3,
|
|
96
101
|
inputs: {
|
|
97
102
|
prompt: ":preprocessor.prompt",
|
|
98
|
-
|
|
103
|
+
images: ":preprocessor.images",
|
|
104
|
+
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
99
105
|
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
100
|
-
force: ":context.force",
|
|
101
|
-
studio: ":context.studio", // for
|
|
102
|
-
index: ":__mapIndex", // for
|
|
103
|
-
sessionType: "image", // for
|
|
106
|
+
force: ":context.force", // only for fileCacheAgentFilter
|
|
107
|
+
studio: ":context.studio", // for fileCacheAgentFilter
|
|
108
|
+
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
109
|
+
sessionType: "image", // for fileCacheAgentFilter
|
|
104
110
|
params: {
|
|
105
111
|
model: ":preprocessor.imageParams.model",
|
|
106
|
-
size: ":preprocessor.imageParams.size",
|
|
107
112
|
moderation: ":preprocessor.imageParams.moderation",
|
|
108
|
-
|
|
109
|
-
|
|
113
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
defaultValue: {},
|
|
117
|
+
},
|
|
118
|
+
movieGenerator: {
|
|
119
|
+
if: ":preprocessor.movieFile",
|
|
120
|
+
agent: "movieGoogleAgent",
|
|
121
|
+
inputs: {
|
|
122
|
+
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
123
|
+
prompt: ":beat.moviePrompt",
|
|
124
|
+
imagePath: ":preprocessor.imagePath",
|
|
125
|
+
file: ":preprocessor.movieFile",
|
|
126
|
+
studio: ":context.studio", // for cache
|
|
127
|
+
index: ":__mapIndex", // for cache
|
|
128
|
+
sessionType: "movie", // for cache
|
|
129
|
+
params: {
|
|
130
|
+
model: ":context.studio.script.movieParams.model",
|
|
131
|
+
duration: ":beat.duration",
|
|
132
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
110
133
|
},
|
|
111
134
|
},
|
|
112
135
|
defaultValue: {},
|
|
@@ -114,11 +137,9 @@ const graph_data = {
|
|
|
114
137
|
output: {
|
|
115
138
|
agent: "copyAgent",
|
|
116
139
|
inputs: {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
output: {
|
|
121
|
-
imageFile: ".image",
|
|
140
|
+
onComplete: ":movieGenerator",
|
|
141
|
+
imageFile: ":preprocessor.imagePath",
|
|
142
|
+
movieFile: ":preprocessor.movieFile",
|
|
122
143
|
},
|
|
123
144
|
isResult: true,
|
|
124
145
|
},
|
|
@@ -141,7 +162,7 @@ const graph_data = {
|
|
|
141
162
|
context: ":context",
|
|
142
163
|
},
|
|
143
164
|
},
|
|
144
|
-
|
|
165
|
+
writeOutput: {
|
|
145
166
|
// console: { before: true },
|
|
146
167
|
agent: "fileWriteAgent",
|
|
147
168
|
inputs: {
|
|
@@ -152,14 +173,20 @@ const graph_data = {
|
|
|
152
173
|
},
|
|
153
174
|
};
|
|
154
175
|
const googleAuth = async () => {
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
176
|
+
try {
|
|
177
|
+
const auth = new GoogleAuth({
|
|
178
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
179
|
+
});
|
|
180
|
+
const client = await auth.getClient();
|
|
181
|
+
const accessToken = await client.getAccessToken();
|
|
182
|
+
return accessToken.token;
|
|
183
|
+
}
|
|
184
|
+
catch (__error) {
|
|
185
|
+
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
186
|
+
process.exit(1);
|
|
187
|
+
}
|
|
161
188
|
};
|
|
162
|
-
const generateImages = async (context) => {
|
|
189
|
+
const generateImages = async (context, callbacks) => {
|
|
163
190
|
const { studio, fileDirs } = context;
|
|
164
191
|
const { outDirPath, imageDirPath } = fileDirs;
|
|
165
192
|
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
@@ -167,7 +194,7 @@ const generateImages = async (context) => {
|
|
|
167
194
|
{
|
|
168
195
|
name: "fileCacheAgentFilter",
|
|
169
196
|
agent: fileCacheAgentFilter,
|
|
170
|
-
nodeIds: ["imageGenerator"],
|
|
197
|
+
nodeIds: ["imageGenerator", "movieGenerator"],
|
|
171
198
|
},
|
|
172
199
|
];
|
|
173
200
|
const options = {
|
|
@@ -175,7 +202,7 @@ const generateImages = async (context) => {
|
|
|
175
202
|
};
|
|
176
203
|
const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
|
|
177
204
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
178
|
-
if (imageAgentInfo.provider === "google") {
|
|
205
|
+
if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
|
|
179
206
|
GraphAILogger.log("google was specified as text2image engine");
|
|
180
207
|
const token = await googleAuth();
|
|
181
208
|
options.config = {
|
|
@@ -183,6 +210,10 @@ const generateImages = async (context) => {
|
|
|
183
210
|
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
184
211
|
token,
|
|
185
212
|
},
|
|
213
|
+
movieGoogleAgent: {
|
|
214
|
+
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
215
|
+
token,
|
|
216
|
+
},
|
|
186
217
|
};
|
|
187
218
|
}
|
|
188
219
|
if (imageAgentInfo.provider === "openai") {
|
|
@@ -219,16 +250,21 @@ const generateImages = async (context) => {
|
|
|
219
250
|
imageDirPath,
|
|
220
251
|
imageRefs,
|
|
221
252
|
};
|
|
222
|
-
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
|
|
253
|
+
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
|
|
223
254
|
Object.keys(injections).forEach((key) => {
|
|
224
255
|
graph.injectValue(key, injections[key]);
|
|
225
256
|
});
|
|
257
|
+
if (callbacks) {
|
|
258
|
+
callbacks.forEach((callback) => {
|
|
259
|
+
graph.registerCallback(callback);
|
|
260
|
+
});
|
|
261
|
+
}
|
|
226
262
|
await graph.run();
|
|
227
263
|
};
|
|
228
|
-
export const images = async (context) => {
|
|
264
|
+
export const images = async (context, callbacks) => {
|
|
229
265
|
try {
|
|
230
266
|
MulmoStudioMethods.setSessionState(context.studio, "image", true);
|
|
231
|
-
await generateImages(context);
|
|
267
|
+
await generateImages(context, callbacks);
|
|
232
268
|
}
|
|
233
269
|
finally {
|
|
234
270
|
MulmoStudioMethods.setSessionState(context.studio, "image", false);
|
package/lib/actions/movie.js
CHANGED
|
@@ -61,8 +61,8 @@ const getOutputOption = (audioId) => {
|
|
|
61
61
|
const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
|
|
62
62
|
const start = performance.now();
|
|
63
63
|
const ffmpegContext = FfmpegContextInit();
|
|
64
|
-
if (studio.beats.some((beat) => !beat.imageFile)) {
|
|
65
|
-
GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
|
|
64
|
+
if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
|
|
65
|
+
GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
|
|
66
66
|
return;
|
|
67
67
|
}
|
|
68
68
|
const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
@@ -71,11 +71,15 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
71
71
|
const filterComplexAudioIds = [];
|
|
72
72
|
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
73
|
const beat = studio.script.beats[index];
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
75
|
+
if (!sourceFile) {
|
|
76
|
+
throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
76
77
|
}
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
if (!studioBeat.duration) {
|
|
79
|
+
throw new Error(`studioBeat.duration is not set: index=${index}`);
|
|
80
|
+
}
|
|
81
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
82
|
+
const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
|
|
79
83
|
const extraPadding = (() => {
|
|
80
84
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
81
85
|
if (index === 0) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const translate: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/translate.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI, assert } from "graphai";
|
|
3
|
-
import
|
|
3
|
+
import * as agents from "@graphai/vanilla";
|
|
4
4
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
9
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
10
|
-
|
|
10
|
+
const vanillaAgents = agents.default ?? agents;
|
|
11
11
|
const translateGraph = {
|
|
12
12
|
version: 0.5,
|
|
13
13
|
nodes: {
|
|
@@ -208,7 +208,7 @@ const agentFilters = [
|
|
|
208
208
|
];
|
|
209
209
|
const defaultLang = "en";
|
|
210
210
|
const targetLangs = ["ja", "en"];
|
|
211
|
-
export const translate = async (context) => {
|
|
211
|
+
export const translate = async (context, callbacks) => {
|
|
212
212
|
try {
|
|
213
213
|
MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
|
|
214
214
|
const { studio, fileDirs } = context;
|
|
@@ -222,6 +222,11 @@ export const translate = async (context) => {
|
|
|
222
222
|
graph.injectValue("targetLangs", targetLangs);
|
|
223
223
|
graph.injectValue("outDirPath", outDirPath);
|
|
224
224
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
225
|
+
if (callbacks) {
|
|
226
|
+
callbacks.forEach((callback) => {
|
|
227
|
+
graph.registerCallback(callback);
|
|
228
|
+
});
|
|
229
|
+
}
|
|
225
230
|
const results = await graph.run();
|
|
226
231
|
writingMessage(outputStudioFilePath);
|
|
227
232
|
if (results.mergeStudioResult) {
|
|
@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
26
26
|
const totalPadding = await (async () => {
|
|
27
27
|
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
28
28
|
const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
|
|
29
|
+
// NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
|
|
29
30
|
const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
|
|
30
31
|
if (movieDuration > audioDuration) {
|
|
31
32
|
return padding + (movieDuration - audioDuration);
|
|
32
33
|
}
|
|
33
34
|
}
|
|
35
|
+
else if (beat.duration && beat.duration > audioDuration) {
|
|
36
|
+
return padding + (beat.duration - audioDuration);
|
|
37
|
+
}
|
|
34
38
|
return padding;
|
|
35
39
|
})();
|
|
36
40
|
studioBeat.duration = audioDuration + totalPadding;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { getAspectRatio } from "./movie_google_agent.js";
|
|
2
3
|
async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
3
4
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
|
|
4
5
|
try {
|
|
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
50
51
|
throw error;
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config
|
|
54
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
54
55
|
const { prompt } = namedInputs;
|
|
55
|
-
const aspectRatio = params.
|
|
56
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
56
57
|
const model = params.model ?? "imagen-3.0-fast-generate-001";
|
|
57
58
|
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
58
59
|
const projectId = config?.projectId;
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
|
|
3
2
|
type OpenAIModeration = "low" | "auto";
|
|
4
3
|
export declare const imageOpenaiAgent: AgentFunction<{
|
|
5
4
|
apiKey: string;
|
|
6
5
|
model: string;
|
|
7
|
-
size: OpenAIImageSize | null | undefined;
|
|
8
6
|
moderation: OpenAIModeration | null | undefined;
|
|
9
|
-
|
|
7
|
+
canvasSize: {
|
|
8
|
+
width: number;
|
|
9
|
+
height: number;
|
|
10
|
+
};
|
|
10
11
|
}, {
|
|
11
12
|
buffer: Buffer;
|
|
12
13
|
}, {
|
|
13
14
|
prompt: string;
|
|
15
|
+
images: string[] | null | undefined;
|
|
14
16
|
}>;
|
|
15
17
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
16
18
|
export default imageOpenaiAgentInfo;
|
|
@@ -2,14 +2,39 @@ import fs from "fs";
|
|
|
2
2
|
import OpenAI, { toFile } from "openai";
|
|
3
3
|
// https://platform.openai.com/docs/guides/image-generation
|
|
4
4
|
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
5
|
-
const { prompt } = namedInputs;
|
|
6
|
-
const { apiKey,
|
|
5
|
+
const { prompt, images } = namedInputs;
|
|
6
|
+
const { apiKey, moderation, canvasSize } = params;
|
|
7
|
+
const model = params.model ?? "dall-e-3";
|
|
7
8
|
const openai = new OpenAI({ apiKey });
|
|
9
|
+
const size = (() => {
|
|
10
|
+
if (model === "gpt-image-1") {
|
|
11
|
+
if (canvasSize.width > canvasSize.height) {
|
|
12
|
+
return "1536x1024";
|
|
13
|
+
}
|
|
14
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
15
|
+
return "1024x1536";
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
return "1024x1024";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
if (canvasSize.width > canvasSize.height) {
|
|
23
|
+
return "1792x1024";
|
|
24
|
+
}
|
|
25
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
26
|
+
return "1024x1792";
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
return "1024x1024";
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
})();
|
|
8
33
|
const imageOptions = {
|
|
9
|
-
model
|
|
34
|
+
model,
|
|
10
35
|
prompt,
|
|
11
36
|
n: 1,
|
|
12
|
-
size
|
|
37
|
+
size,
|
|
13
38
|
};
|
|
14
39
|
if (model === "gpt-image-1") {
|
|
15
40
|
imageOptions.moderation = moderation || "auto";
|