mulmocast 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/assets/templates/ghibli_shorts.json +34 -0
- package/assets/templates/shorts.json +18 -0
- package/assets/templates/trailer.json +25 -0
- package/lib/actions/audio.d.ts +2 -1
- package/lib/actions/audio.js +35 -17
- package/lib/actions/captions.js +5 -5
- package/lib/actions/images.d.ts +2 -1
- package/lib/actions/images.js +90 -58
- package/lib/actions/movie.js +53 -16
- package/lib/actions/pdf.js +3 -3
- package/lib/actions/translate.d.ts +2 -1
- package/lib/actions/translate.js +21 -16
- package/lib/agents/combine_audio_files_agent.js +4 -0
- package/lib/agents/image_google_agent.d.ts +4 -1
- package/lib/agents/image_google_agent.js +3 -2
- package/lib/agents/image_openai_agent.d.ts +5 -3
- package/lib/agents/image_openai_agent.js +35 -7
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_google_agent.d.ts +9 -2
- package/lib/agents/movie_google_agent.js +24 -16
- package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
- package/lib/agents/tts_elevenlabs_agent.js +60 -0
- package/lib/agents/tts_google_agent.js +1 -1
- package/lib/agents/tts_nijivoice_agent.js +3 -2
- package/lib/agents/tts_openai_agent.js +1 -1
- package/lib/cli/commands/audio/handler.js +4 -1
- package/lib/cli/commands/image/handler.js +4 -1
- package/lib/cli/commands/movie/handler.js +4 -1
- package/lib/cli/commands/pdf/handler.js +4 -1
- package/lib/cli/commands/translate/handler.js +4 -1
- package/lib/cli/helpers.d.ts +3 -3
- package/lib/cli/helpers.js +38 -20
- package/lib/index.d.ts +5 -0
- package/lib/index.js +5 -0
- package/lib/methods/mulmo_media_source.d.ts +1 -0
- package/lib/methods/mulmo_media_source.js +12 -0
- package/lib/methods/mulmo_script.d.ts +1 -1
- package/lib/methods/mulmo_script.js +9 -5
- package/lib/methods/mulmo_studio_context.d.ts +5 -0
- package/lib/methods/mulmo_studio_context.js +23 -0
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.js +1 -0
- package/lib/types/schema.d.ts +1513 -290
- package/lib/types/schema.js +26 -35
- package/lib/types/type.d.ts +4 -1
- package/lib/utils/file.d.ts +5 -15
- package/lib/utils/file.js +14 -21
- package/lib/utils/filters.js +4 -4
- package/lib/utils/image_plugins/beat.d.ts +4 -0
- package/lib/utils/image_plugins/beat.js +7 -0
- package/lib/utils/image_plugins/image.d.ts +1 -1
- package/lib/utils/image_plugins/index.d.ts +2 -1
- package/lib/utils/image_plugins/index.js +2 -1
- package/lib/utils/image_plugins/movie.d.ts +1 -1
- package/lib/utils/image_plugins/source.js +2 -2
- package/lib/utils/preprocess.d.ts +26 -23
- package/lib/utils/preprocess.js +4 -0
- package/package.json +8 -8
- package/scripts/templates/movie_prompts_no_text_template.json +50 -0
- package/scripts/templates/shorts_template.json +52 -0
package/README.md
CHANGED
|
@@ -90,11 +90,26 @@ Create a `.env` file in your project directory with the following API keys:
|
|
|
90
90
|
```bash
|
|
91
91
|
OPENAI_API_KEY=your_openai_api_key
|
|
92
92
|
```
|
|
93
|
-
|
|
93
|
+
|
|
94
|
+
#### (Optional) For the advanced image generation model
|
|
94
95
|
```bash
|
|
95
96
|
DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
#### (Optional) For Google's image generation model
|
|
100
|
+
```bash
|
|
101
|
+
GOOGLE_PROJECT_ID=your_google_project_id
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
105
|
+
|
|
106
|
+
#### (Optional) For Nijivoice's TTS model
|
|
107
|
+
```bash
|
|
108
|
+
NIJIVOICE_API_KEY=your_nijivoice_api_key
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### (Optional) to access web in mulmo tool
|
|
112
|
+
```bash
|
|
98
113
|
BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
|
|
99
114
|
```
|
|
100
115
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Ghibli comic style",
|
|
3
|
+
"description": "Template for Ghibli-style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1024,
|
|
12
|
+
"height": 1536
|
|
13
|
+
},
|
|
14
|
+
"speechParams": {
|
|
15
|
+
"provider": "nijivoice",
|
|
16
|
+
"speakers": {
|
|
17
|
+
"Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"imageParams": {
|
|
21
|
+
"style": "<style>Ghibli style</style>",
|
|
22
|
+
"images": {
|
|
23
|
+
"presenter": {
|
|
24
|
+
"type": "image",
|
|
25
|
+
"source": {
|
|
26
|
+
"kind": "url",
|
|
27
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"scriptName": "image_prompts_template.json"
|
|
34
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Short movie template",
|
|
3
|
+
"description": "Template for Youtube shorts.",
|
|
4
|
+
"systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0"
|
|
8
|
+
},
|
|
9
|
+
"canvasSize": {
|
|
10
|
+
"width": 720,
|
|
11
|
+
"height": 1280
|
|
12
|
+
},
|
|
13
|
+
"imageParams": {
|
|
14
|
+
"style": "<style>Photo realistic, cinematic.</style>"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"scriptName": "movie_prompts_template.json"
|
|
18
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Movie Trailer template",
|
|
3
|
+
"description": "Template for A Movie Trailer.",
|
|
4
|
+
"systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0"
|
|
8
|
+
},
|
|
9
|
+
"canvasSize": {
|
|
10
|
+
"width": 1280,
|
|
11
|
+
"height": 720
|
|
12
|
+
},
|
|
13
|
+
"imageParams": {
|
|
14
|
+
"style": "<style>Photo realistic, cinematic.</style>"
|
|
15
|
+
},
|
|
16
|
+
"audioParams": {
|
|
17
|
+
"padding": 0.0,
|
|
18
|
+
"introPadding": 0.0,
|
|
19
|
+
"closingPadding": 0.0,
|
|
20
|
+
"outroPadding": 2.5,
|
|
21
|
+
"bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"scriptName": "movie_prompts_no_text_template.json"
|
|
25
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const audio: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
|
6
6
|
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
7
7
|
import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
8
8
|
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
|
+
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
9
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
10
11
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
11
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
12
|
-
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath,
|
|
13
|
+
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
|
|
13
14
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
|
-
import {
|
|
15
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
|
+
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
15
17
|
const vanillaAgents = agents.default ?? agents;
|
|
16
18
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
17
19
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
@@ -19,10 +21,11 @@ const provider_to_agent = {
|
|
|
19
21
|
nijivoice: "ttsNijivoiceAgent",
|
|
20
22
|
openai: "ttsOpenaiAgent",
|
|
21
23
|
google: "ttsGoogleAgent",
|
|
24
|
+
elevenlabs: "ttsElevenlabsAgent",
|
|
22
25
|
};
|
|
23
26
|
const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
24
27
|
if (beat.audio?.type === "audio") {
|
|
25
|
-
const path =
|
|
28
|
+
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
26
29
|
if (path) {
|
|
27
30
|
return path;
|
|
28
31
|
}
|
|
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
34
37
|
return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
|
|
35
38
|
};
|
|
36
39
|
const preprocessor = (namedInputs) => {
|
|
37
|
-
const { beat, studioBeat, multiLingual,
|
|
40
|
+
const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
|
|
38
41
|
const { lang } = context;
|
|
39
|
-
const
|
|
42
|
+
const speaker = context.studio.script.speechParams.speakers[beat.speaker];
|
|
43
|
+
const voiceId = speaker.voiceId;
|
|
40
44
|
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
41
45
|
const text = localizedText(beat, multiLingual, lang);
|
|
42
|
-
|
|
43
|
-
const
|
|
46
|
+
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
47
|
+
const provider = speaker.provider ?? context.studio.script.speechParams.provider;
|
|
48
|
+
const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
|
|
49
|
+
const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
44
50
|
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
45
51
|
studioBeat.audioFile = audioPath;
|
|
46
52
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
47
53
|
return {
|
|
48
|
-
ttsAgent: provider_to_agent[
|
|
54
|
+
ttsAgent: provider_to_agent[provider],
|
|
49
55
|
studioBeat,
|
|
50
56
|
voiceId,
|
|
51
57
|
speechOptions,
|
|
@@ -62,7 +68,6 @@ const graph_tts = {
|
|
|
62
68
|
beat: ":beat",
|
|
63
69
|
studioBeat: ":studioBeat",
|
|
64
70
|
multiLingual: ":multiLingual",
|
|
65
|
-
index: ":__mapIndex",
|
|
66
71
|
context: ":context",
|
|
67
72
|
audioDirPath: ":audioDirPath",
|
|
68
73
|
},
|
|
@@ -74,7 +79,7 @@ const graph_tts = {
|
|
|
74
79
|
text: ":preprocessor.text",
|
|
75
80
|
file: ":preprocessor.audioPath",
|
|
76
81
|
force: ":context.force",
|
|
77
|
-
|
|
82
|
+
mulmoContext: ":context", // for cache
|
|
78
83
|
index: ":__mapIndex", // for cache
|
|
79
84
|
sessionType: "audio", // for cache
|
|
80
85
|
params: {
|
|
@@ -96,6 +101,7 @@ const graph_data = {
|
|
|
96
101
|
outputStudioFilePath: {},
|
|
97
102
|
audioDirPath: {},
|
|
98
103
|
audioSegmentDirPath: {},
|
|
104
|
+
musicFile: {},
|
|
99
105
|
map: {
|
|
100
106
|
agent: "mapAgent",
|
|
101
107
|
inputs: {
|
|
@@ -130,14 +136,14 @@ const graph_data = {
|
|
|
130
136
|
},
|
|
131
137
|
addBGM: {
|
|
132
138
|
agent: "addBGMAgent",
|
|
133
|
-
params: {
|
|
134
|
-
musicFile: process.env.PATH_BGM ?? defaultBGMPath,
|
|
135
|
-
},
|
|
136
139
|
inputs: {
|
|
137
140
|
wait: ":combineFiles",
|
|
138
141
|
voiceFile: ":audioCombinedFilePath",
|
|
139
142
|
outputFile: ":audioArtifactFilePath",
|
|
140
143
|
script: ":context.studio.script",
|
|
144
|
+
params: {
|
|
145
|
+
musicFile: ":musicFile",
|
|
146
|
+
},
|
|
141
147
|
},
|
|
142
148
|
isResult: true,
|
|
143
149
|
},
|
|
@@ -160,9 +166,9 @@ const agentFilters = [
|
|
|
160
166
|
nodeIds: ["tts"],
|
|
161
167
|
},
|
|
162
168
|
];
|
|
163
|
-
export const audio = async (context) => {
|
|
169
|
+
export const audio = async (context, callbacks) => {
|
|
164
170
|
try {
|
|
165
|
-
|
|
171
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
166
172
|
const { studio, fileDirs, lang } = context;
|
|
167
173
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
168
174
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
@@ -171,13 +177,19 @@ export const audio = async (context) => {
|
|
|
171
177
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
172
178
|
mkdir(outDirPath);
|
|
173
179
|
mkdir(audioSegmentDirPath);
|
|
174
|
-
|
|
180
|
+
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
181
|
+
const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
|
|
182
|
+
const provider = speaker.provider ?? studio.script.speechParams.provider;
|
|
183
|
+
return provider === "nijivoice" || provider === "elevenlabs";
|
|
184
|
+
});
|
|
185
|
+
graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
|
|
175
186
|
const graph = new GraphAI(graph_data, {
|
|
176
187
|
...vanillaAgents,
|
|
177
188
|
fileWriteAgent,
|
|
178
189
|
ttsOpenaiAgent,
|
|
179
190
|
ttsNijivoiceAgent,
|
|
180
191
|
ttsGoogleAgent,
|
|
192
|
+
ttsElevenlabsAgent,
|
|
181
193
|
addBGMAgent,
|
|
182
194
|
combineAudioFilesAgent,
|
|
183
195
|
}, { agentFilters });
|
|
@@ -187,10 +199,16 @@ export const audio = async (context) => {
|
|
|
187
199
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
188
200
|
graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
|
|
189
201
|
graph.injectValue("audioDirPath", audioDirPath);
|
|
202
|
+
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
|
|
203
|
+
if (callbacks) {
|
|
204
|
+
callbacks.forEach((callback) => {
|
|
205
|
+
graph.registerCallback(callback);
|
|
206
|
+
});
|
|
207
|
+
}
|
|
190
208
|
await graph.run();
|
|
191
209
|
writingMessage(audioCombinedFilePath);
|
|
192
210
|
}
|
|
193
211
|
finally {
|
|
194
|
-
|
|
212
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
195
213
|
}
|
|
196
214
|
};
|
package/lib/actions/captions.js
CHANGED
|
@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
|
|
|
2
2
|
import * as agents from "@graphai/vanilla";
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
|
-
import {
|
|
5
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
6
6
|
const vanillaAgents = agents.default ?? agents;
|
|
7
7
|
const graph_data = {
|
|
8
8
|
version: 0.5,
|
|
@@ -22,7 +22,7 @@ const graph_data = {
|
|
|
22
22
|
agent: async (namedInputs) => {
|
|
23
23
|
const { beat, context, index } = namedInputs;
|
|
24
24
|
try {
|
|
25
|
-
|
|
25
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
|
|
26
26
|
const { fileDirs } = namedInputs.context;
|
|
27
27
|
const { caption } = context;
|
|
28
28
|
const { imageDirPath } = fileDirs;
|
|
@@ -47,7 +47,7 @@ const graph_data = {
|
|
|
47
47
|
return imagePath;
|
|
48
48
|
}
|
|
49
49
|
finally {
|
|
50
|
-
|
|
50
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
|
|
51
51
|
}
|
|
52
52
|
},
|
|
53
53
|
inputs: {
|
|
@@ -64,12 +64,12 @@ const graph_data = {
|
|
|
64
64
|
};
|
|
65
65
|
export const captions = async (context) => {
|
|
66
66
|
try {
|
|
67
|
-
|
|
67
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", true);
|
|
68
68
|
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
|
69
69
|
graph.injectValue("context", context);
|
|
70
70
|
await graph.run();
|
|
71
71
|
}
|
|
72
72
|
finally {
|
|
73
|
-
|
|
73
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", false);
|
|
74
74
|
}
|
|
75
75
|
};
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
+
import type { CallbackFunction } from "graphai";
|
|
1
2
|
import { MulmoStudioContext } from "../types/index.js";
|
|
2
|
-
export declare const images: (context: MulmoStudioContext) => Promise<void>;
|
|
3
|
+
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
|
|
|
15
15
|
dotenv.config();
|
|
16
16
|
// const openai = new OpenAI();
|
|
17
17
|
import { GoogleAuth } from "google-auth-library";
|
|
18
|
-
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
19
18
|
const htmlStyle = (script, beat) => {
|
|
20
19
|
return {
|
|
21
20
|
canvasSize: MulmoScriptMethods.getCanvasSize(script),
|
|
@@ -25,37 +24,37 @@ const htmlStyle = (script, beat) => {
|
|
|
25
24
|
const imagePreprocessAgent = async (namedInputs) => {
|
|
26
25
|
const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
|
|
27
26
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
28
|
-
if (!imageParams.size) {
|
|
29
|
-
const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
|
|
30
|
-
imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
|
|
31
|
-
}
|
|
32
27
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
33
28
|
const returnValue = {
|
|
34
|
-
aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
|
|
35
29
|
imageParams,
|
|
30
|
+
movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
|
|
36
31
|
};
|
|
37
32
|
if (beat.image) {
|
|
38
33
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
39
34
|
if (plugin) {
|
|
40
35
|
try {
|
|
41
|
-
|
|
36
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
42
37
|
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
43
38
|
const path = await plugin.process(processorParams);
|
|
44
39
|
// undefined prompt indicates that image generation is not needed
|
|
45
|
-
return { path, ...returnValue };
|
|
40
|
+
return { imagePath: path, ...returnValue };
|
|
46
41
|
}
|
|
47
42
|
finally {
|
|
48
|
-
|
|
43
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
49
44
|
}
|
|
50
45
|
}
|
|
51
46
|
}
|
|
52
|
-
|
|
47
|
+
// images for "edit_image"
|
|
53
48
|
const images = (() => {
|
|
54
49
|
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
55
50
|
const sources = imageNames.map((name) => imageRefs[name]);
|
|
56
51
|
return sources.filter((source) => source !== undefined);
|
|
57
52
|
})();
|
|
58
|
-
|
|
53
|
+
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
54
|
+
return { ...returnValue, images }; // no image prompt, only movie prompt
|
|
55
|
+
}
|
|
56
|
+
const prompt = imagePrompt(beat, imageParams.style);
|
|
57
|
+
return { imagePath, prompt, ...returnValue, images };
|
|
59
58
|
};
|
|
60
59
|
const graph_data = {
|
|
61
60
|
version: 0.5,
|
|
@@ -100,64 +99,53 @@ const graph_data = {
|
|
|
100
99
|
retry: 3,
|
|
101
100
|
inputs: {
|
|
102
101
|
prompt: ":preprocessor.prompt",
|
|
103
|
-
|
|
102
|
+
images: ":preprocessor.images",
|
|
103
|
+
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
104
104
|
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
105
|
-
force: ":context.force",
|
|
106
|
-
|
|
107
|
-
index: ":__mapIndex", // for
|
|
108
|
-
sessionType: "image", // for
|
|
105
|
+
force: ":context.force", // only for fileCacheAgentFilter
|
|
106
|
+
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
107
|
+
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
108
|
+
sessionType: "image", // for fileCacheAgentFilter
|
|
109
109
|
params: {
|
|
110
110
|
model: ":preprocessor.imageParams.model",
|
|
111
|
-
size: ":preprocessor.imageParams.size",
|
|
112
111
|
moderation: ":preprocessor.imageParams.moderation",
|
|
113
|
-
|
|
114
|
-
images: ":preprocessor.images",
|
|
112
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
115
113
|
},
|
|
116
114
|
},
|
|
117
115
|
defaultValue: {},
|
|
118
116
|
},
|
|
119
|
-
prepareMovie: {
|
|
120
|
-
agent: (namedInputs) => {
|
|
121
|
-
const { beat, imageDirPath, index, context } = namedInputs;
|
|
122
|
-
if (beat.moviePrompt) {
|
|
123
|
-
const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
|
|
124
|
-
return { movieFile };
|
|
125
|
-
}
|
|
126
|
-
return {};
|
|
127
|
-
},
|
|
128
|
-
inputs: {
|
|
129
|
-
result: ":imageGenerator", // to wait for imageGenerator to finish
|
|
130
|
-
imagePath: ":preprocessor.path",
|
|
131
|
-
beat: ":beat",
|
|
132
|
-
imageDirPath: ":imageDirPath",
|
|
133
|
-
index: ":__mapIndex",
|
|
134
|
-
context: ":context",
|
|
135
|
-
},
|
|
136
|
-
},
|
|
137
117
|
movieGenerator: {
|
|
138
|
-
if: ":
|
|
118
|
+
if: ":preprocessor.movieFile",
|
|
139
119
|
agent: "movieGoogleAgent",
|
|
140
120
|
inputs: {
|
|
121
|
+
onComplete: ":imageGenerator", // to wait for imageGenerator to finish
|
|
141
122
|
prompt: ":beat.moviePrompt",
|
|
142
|
-
imagePath: ":preprocessor.
|
|
143
|
-
file: ":
|
|
123
|
+
imagePath: ":preprocessor.imagePath",
|
|
124
|
+
file: ":preprocessor.movieFile",
|
|
144
125
|
studio: ":context.studio", // for cache
|
|
145
126
|
index: ":__mapIndex", // for cache
|
|
146
127
|
sessionType: "movie", // for cache
|
|
147
128
|
params: {
|
|
148
129
|
model: ":context.studio.script.movieParams.model",
|
|
149
|
-
aspectRatio: ":preprocessor.aspectRatio",
|
|
150
130
|
duration: ":beat.duration",
|
|
131
|
+
canvasSize: ":context.studio.script.canvasSize",
|
|
151
132
|
},
|
|
152
133
|
},
|
|
153
134
|
defaultValue: {},
|
|
154
135
|
},
|
|
136
|
+
onComplete: {
|
|
137
|
+
agent: "copyAgent",
|
|
138
|
+
inputs: {
|
|
139
|
+
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
140
|
+
imageFile: ":preprocessor.imagePath",
|
|
141
|
+
movieFile: ":preprocessor.movieFile",
|
|
142
|
+
},
|
|
143
|
+
},
|
|
155
144
|
output: {
|
|
156
145
|
agent: "copyAgent",
|
|
157
146
|
inputs: {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
movieFile: ":prepareMovie.movieFile",
|
|
147
|
+
imageFile: ":onComplete.imageFile",
|
|
148
|
+
movieFile: ":onComplete.movieFile",
|
|
161
149
|
},
|
|
162
150
|
isResult: true,
|
|
163
151
|
},
|
|
@@ -168,11 +156,26 @@ const graph_data = {
|
|
|
168
156
|
agent: (namedInputs) => {
|
|
169
157
|
const { array, context } = namedInputs;
|
|
170
158
|
const { studio } = context;
|
|
159
|
+
const beatIndexMap = {};
|
|
171
160
|
array.forEach((update, index) => {
|
|
172
161
|
const beat = studio.beats[index];
|
|
173
162
|
studio.beats[index] = { ...beat, ...update };
|
|
163
|
+
const id = studio.script.beats[index].id;
|
|
164
|
+
if (id) {
|
|
165
|
+
beatIndexMap[id] = index;
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
studio.beats.forEach((studioBeat, index) => {
|
|
169
|
+
const beat = studio.script.beats[index];
|
|
170
|
+
if (beat.image?.type === "beat") {
|
|
171
|
+
if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
|
|
172
|
+
studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
|
|
173
|
+
}
|
|
174
|
+
else if (index > 0) {
|
|
175
|
+
studioBeat.imageFile = studio.beats[index - 1].imageFile;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
174
178
|
});
|
|
175
|
-
// console.log(namedInputs);
|
|
176
179
|
return { studio };
|
|
177
180
|
},
|
|
178
181
|
inputs: {
|
|
@@ -180,7 +183,7 @@ const graph_data = {
|
|
|
180
183
|
context: ":context",
|
|
181
184
|
},
|
|
182
185
|
},
|
|
183
|
-
|
|
186
|
+
writeOutput: {
|
|
184
187
|
// console: { before: true },
|
|
185
188
|
agent: "fileWriteAgent",
|
|
186
189
|
inputs: {
|
|
@@ -191,14 +194,20 @@ const graph_data = {
|
|
|
191
194
|
},
|
|
192
195
|
};
|
|
193
196
|
const googleAuth = async () => {
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
197
|
+
try {
|
|
198
|
+
const auth = new GoogleAuth({
|
|
199
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
200
|
+
});
|
|
201
|
+
const client = await auth.getClient();
|
|
202
|
+
const accessToken = await client.getAccessToken();
|
|
203
|
+
return accessToken.token;
|
|
204
|
+
}
|
|
205
|
+
catch (error) {
|
|
206
|
+
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
207
|
+
throw error;
|
|
208
|
+
}
|
|
200
209
|
};
|
|
201
|
-
const generateImages = async (context) => {
|
|
210
|
+
const generateImages = async (context, callbacks) => {
|
|
202
211
|
const { studio, fileDirs } = context;
|
|
203
212
|
const { outDirPath, imageDirPath } = fileDirs;
|
|
204
213
|
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
@@ -248,7 +257,25 @@ const generateImages = async (context) => {
|
|
|
248
257
|
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
249
258
|
}
|
|
250
259
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
251
|
-
|
|
260
|
+
// Detect file extension from Content-Type header or URL
|
|
261
|
+
const extension = (() => {
|
|
262
|
+
const contentType = response.headers.get("content-type");
|
|
263
|
+
if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
|
|
264
|
+
return "jpg";
|
|
265
|
+
}
|
|
266
|
+
else if (contentType?.includes("png")) {
|
|
267
|
+
return "png";
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
// Fall back to URL extension
|
|
271
|
+
const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
|
|
272
|
+
if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
|
|
273
|
+
return urlExtension === "jpeg" ? "jpg" : urlExtension;
|
|
274
|
+
}
|
|
275
|
+
return "png"; // default
|
|
276
|
+
}
|
|
277
|
+
})();
|
|
278
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
|
|
252
279
|
await fs.promises.writeFile(imagePath, buffer);
|
|
253
280
|
imageRefs[key] = imagePath;
|
|
254
281
|
}
|
|
@@ -266,14 +293,19 @@ const generateImages = async (context) => {
|
|
|
266
293
|
Object.keys(injections).forEach((key) => {
|
|
267
294
|
graph.injectValue(key, injections[key]);
|
|
268
295
|
});
|
|
296
|
+
if (callbacks) {
|
|
297
|
+
callbacks.forEach((callback) => {
|
|
298
|
+
graph.registerCallback(callback);
|
|
299
|
+
});
|
|
300
|
+
}
|
|
269
301
|
await graph.run();
|
|
270
302
|
};
|
|
271
|
-
export const images = async (context) => {
|
|
303
|
+
export const images = async (context, callbacks) => {
|
|
272
304
|
try {
|
|
273
|
-
|
|
274
|
-
await generateImages(context);
|
|
305
|
+
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
306
|
+
await generateImages(context, callbacks);
|
|
275
307
|
}
|
|
276
308
|
finally {
|
|
277
|
-
|
|
309
|
+
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
278
310
|
}
|
|
279
311
|
};
|