mulmocast 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -3
- package/assets/templates/ghibli_shorts.json +34 -0
- package/assets/templates/trailer.json +25 -0
- package/lib/actions/audio.js +29 -16
- package/lib/actions/captions.js +5 -5
- package/lib/actions/images.js +51 -12
- package/lib/actions/movie.js +46 -13
- package/lib/actions/pdf.js +3 -3
- package/lib/actions/translate.js +15 -15
- package/lib/agents/image_openai_agent.js +6 -3
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
- package/lib/agents/tts_elevenlabs_agent.js +60 -0
- package/lib/agents/tts_google_agent.js +1 -1
- package/lib/agents/tts_nijivoice_agent.js +3 -2
- package/lib/agents/tts_openai_agent.js +1 -1
- package/lib/cli/commands/audio/handler.js +4 -1
- package/lib/cli/commands/image/handler.js +4 -1
- package/lib/cli/commands/movie/handler.js +4 -1
- package/lib/cli/commands/pdf/handler.js +4 -1
- package/lib/cli/commands/translate/handler.js +4 -1
- package/lib/cli/helpers.d.ts +3 -3
- package/lib/cli/helpers.js +38 -20
- package/lib/methods/mulmo_media_source.d.ts +1 -0
- package/lib/methods/mulmo_media_source.js +12 -0
- package/lib/methods/mulmo_script.d.ts +1 -0
- package/lib/methods/mulmo_script.js +9 -0
- package/lib/methods/mulmo_studio_context.d.ts +5 -0
- package/lib/methods/mulmo_studio_context.js +23 -0
- package/lib/types/schema.d.ts +1498 -242
- package/lib/types/schema.js +25 -34
- package/lib/types/type.d.ts +4 -1
- package/lib/utils/file.d.ts +4 -15
- package/lib/utils/file.js +2 -13
- package/lib/utils/filters.js +4 -4
- package/lib/utils/image_plugins/beat.d.ts +4 -0
- package/lib/utils/image_plugins/beat.js +7 -0
- package/lib/utils/image_plugins/index.d.ts +2 -1
- package/lib/utils/image_plugins/index.js +2 -1
- package/lib/utils/image_plugins/source.js +2 -2
- package/lib/utils/preprocess.d.ts +24 -20
- package/lib/utils/preprocess.js +4 -0
- package/package.json +1 -1
- package/scripts/templates/movie_prompts_no_text_template.json +50 -0
package/README.md
CHANGED
|
@@ -101,9 +101,7 @@ DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
|
|
|
101
101
|
GOOGLE_PROJECT_ID=your_google_project_id
|
|
102
102
|
```
|
|
103
103
|
|
|
104
|
-
|
|
105
|
-
1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
|
|
106
|
-
2. Login by `gcloud auth application-default login`
|
|
104
|
+
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
107
105
|
|
|
108
106
|
#### (Optional) For Nijivoice's TTS model
|
|
109
107
|
```bash
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Ghibli comic style",
|
|
3
|
+
"description": "Template for Ghibli-style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1024,
|
|
12
|
+
"height": 1536
|
|
13
|
+
},
|
|
14
|
+
"speechParams": {
|
|
15
|
+
"provider": "nijivoice",
|
|
16
|
+
"speakers": {
|
|
17
|
+
"Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"imageParams": {
|
|
21
|
+
"style": "<style>Ghibli style</style>",
|
|
22
|
+
"images": {
|
|
23
|
+
"presenter": {
|
|
24
|
+
"type": "image",
|
|
25
|
+
"source": {
|
|
26
|
+
"kind": "url",
|
|
27
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"scriptName": "image_prompts_template.json"
|
|
34
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Movie Trailer template",
|
|
3
|
+
"description": "Template for A Movie Trailer.",
|
|
4
|
+
"systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0"
|
|
8
|
+
},
|
|
9
|
+
"canvasSize": {
|
|
10
|
+
"width": 1280,
|
|
11
|
+
"height": 720
|
|
12
|
+
},
|
|
13
|
+
"imageParams": {
|
|
14
|
+
"style": "<style>Photo realistic, cinematic.</style>"
|
|
15
|
+
},
|
|
16
|
+
"audioParams": {
|
|
17
|
+
"padding": 0.0,
|
|
18
|
+
"introPadding": 0.0,
|
|
19
|
+
"closingPadding": 0.0,
|
|
20
|
+
"outroPadding": 2.5,
|
|
21
|
+
"bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"scriptName": "movie_prompts_no_text_template.json"
|
|
25
|
+
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
|
6
6
|
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
7
7
|
import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
8
8
|
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
|
+
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
9
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
10
11
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
11
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
12
|
-
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath,
|
|
13
|
+
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
|
|
13
14
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
|
-
import {
|
|
15
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
|
+
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
15
17
|
const vanillaAgents = agents.default ?? agents;
|
|
16
18
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
17
19
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
@@ -19,10 +21,11 @@ const provider_to_agent = {
|
|
|
19
21
|
nijivoice: "ttsNijivoiceAgent",
|
|
20
22
|
openai: "ttsOpenaiAgent",
|
|
21
23
|
google: "ttsGoogleAgent",
|
|
24
|
+
elevenlabs: "ttsElevenlabsAgent",
|
|
22
25
|
};
|
|
23
26
|
const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
24
27
|
if (beat.audio?.type === "audio") {
|
|
25
|
-
const path =
|
|
28
|
+
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
26
29
|
if (path) {
|
|
27
30
|
return path;
|
|
28
31
|
}
|
|
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
34
37
|
return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
|
|
35
38
|
};
|
|
36
39
|
const preprocessor = (namedInputs) => {
|
|
37
|
-
const { beat, studioBeat, multiLingual,
|
|
40
|
+
const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
|
|
38
41
|
const { lang } = context;
|
|
39
|
-
const
|
|
42
|
+
const speaker = context.studio.script.speechParams.speakers[beat.speaker];
|
|
43
|
+
const voiceId = speaker.voiceId;
|
|
40
44
|
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
41
45
|
const text = localizedText(beat, multiLingual, lang);
|
|
42
|
-
|
|
43
|
-
const
|
|
46
|
+
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
47
|
+
const provider = speaker.provider ?? context.studio.script.speechParams.provider;
|
|
48
|
+
const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
|
|
49
|
+
const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
44
50
|
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
45
51
|
studioBeat.audioFile = audioPath;
|
|
46
52
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
47
53
|
return {
|
|
48
|
-
ttsAgent: provider_to_agent[
|
|
54
|
+
ttsAgent: provider_to_agent[provider],
|
|
49
55
|
studioBeat,
|
|
50
56
|
voiceId,
|
|
51
57
|
speechOptions,
|
|
@@ -62,7 +68,6 @@ const graph_tts = {
|
|
|
62
68
|
beat: ":beat",
|
|
63
69
|
studioBeat: ":studioBeat",
|
|
64
70
|
multiLingual: ":multiLingual",
|
|
65
|
-
index: ":__mapIndex",
|
|
66
71
|
context: ":context",
|
|
67
72
|
audioDirPath: ":audioDirPath",
|
|
68
73
|
},
|
|
@@ -74,7 +79,7 @@ const graph_tts = {
|
|
|
74
79
|
text: ":preprocessor.text",
|
|
75
80
|
file: ":preprocessor.audioPath",
|
|
76
81
|
force: ":context.force",
|
|
77
|
-
|
|
82
|
+
mulmoContext: ":context", // for cache
|
|
78
83
|
index: ":__mapIndex", // for cache
|
|
79
84
|
sessionType: "audio", // for cache
|
|
80
85
|
params: {
|
|
@@ -96,6 +101,7 @@ const graph_data = {
|
|
|
96
101
|
outputStudioFilePath: {},
|
|
97
102
|
audioDirPath: {},
|
|
98
103
|
audioSegmentDirPath: {},
|
|
104
|
+
musicFile: {},
|
|
99
105
|
map: {
|
|
100
106
|
agent: "mapAgent",
|
|
101
107
|
inputs: {
|
|
@@ -130,14 +136,14 @@ const graph_data = {
|
|
|
130
136
|
},
|
|
131
137
|
addBGM: {
|
|
132
138
|
agent: "addBGMAgent",
|
|
133
|
-
params: {
|
|
134
|
-
musicFile: process.env.PATH_BGM ?? defaultBGMPath,
|
|
135
|
-
},
|
|
136
139
|
inputs: {
|
|
137
140
|
wait: ":combineFiles",
|
|
138
141
|
voiceFile: ":audioCombinedFilePath",
|
|
139
142
|
outputFile: ":audioArtifactFilePath",
|
|
140
143
|
script: ":context.studio.script",
|
|
144
|
+
params: {
|
|
145
|
+
musicFile: ":musicFile",
|
|
146
|
+
},
|
|
141
147
|
},
|
|
142
148
|
isResult: true,
|
|
143
149
|
},
|
|
@@ -162,7 +168,7 @@ const agentFilters = [
|
|
|
162
168
|
];
|
|
163
169
|
export const audio = async (context, callbacks) => {
|
|
164
170
|
try {
|
|
165
|
-
|
|
171
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
166
172
|
const { studio, fileDirs, lang } = context;
|
|
167
173
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
168
174
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
@@ -171,13 +177,19 @@ export const audio = async (context, callbacks) => {
|
|
|
171
177
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
172
178
|
mkdir(outDirPath);
|
|
173
179
|
mkdir(audioSegmentDirPath);
|
|
174
|
-
|
|
180
|
+
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
181
|
+
const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
|
|
182
|
+
const provider = speaker.provider ?? studio.script.speechParams.provider;
|
|
183
|
+
return provider === "nijivoice" || provider === "elevenlabs";
|
|
184
|
+
});
|
|
185
|
+
graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
|
|
175
186
|
const graph = new GraphAI(graph_data, {
|
|
176
187
|
...vanillaAgents,
|
|
177
188
|
fileWriteAgent,
|
|
178
189
|
ttsOpenaiAgent,
|
|
179
190
|
ttsNijivoiceAgent,
|
|
180
191
|
ttsGoogleAgent,
|
|
192
|
+
ttsElevenlabsAgent,
|
|
181
193
|
addBGMAgent,
|
|
182
194
|
combineAudioFilesAgent,
|
|
183
195
|
}, { agentFilters });
|
|
@@ -187,6 +199,7 @@ export const audio = async (context, callbacks) => {
|
|
|
187
199
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
188
200
|
graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
|
|
189
201
|
graph.injectValue("audioDirPath", audioDirPath);
|
|
202
|
+
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
|
|
190
203
|
if (callbacks) {
|
|
191
204
|
callbacks.forEach((callback) => {
|
|
192
205
|
graph.registerCallback(callback);
|
|
@@ -196,6 +209,6 @@ export const audio = async (context, callbacks) => {
|
|
|
196
209
|
writingMessage(audioCombinedFilePath);
|
|
197
210
|
}
|
|
198
211
|
finally {
|
|
199
|
-
|
|
212
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
200
213
|
}
|
|
201
214
|
};
|
package/lib/actions/captions.js
CHANGED
|
@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
|
|
|
2
2
|
import * as agents from "@graphai/vanilla";
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
|
-
import {
|
|
5
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
6
6
|
const vanillaAgents = agents.default ?? agents;
|
|
7
7
|
const graph_data = {
|
|
8
8
|
version: 0.5,
|
|
@@ -22,7 +22,7 @@ const graph_data = {
|
|
|
22
22
|
agent: async (namedInputs) => {
|
|
23
23
|
const { beat, context, index } = namedInputs;
|
|
24
24
|
try {
|
|
25
|
-
|
|
25
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
|
|
26
26
|
const { fileDirs } = namedInputs.context;
|
|
27
27
|
const { caption } = context;
|
|
28
28
|
const { imageDirPath } = fileDirs;
|
|
@@ -47,7 +47,7 @@ const graph_data = {
|
|
|
47
47
|
return imagePath;
|
|
48
48
|
}
|
|
49
49
|
finally {
|
|
50
|
-
|
|
50
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
|
|
51
51
|
}
|
|
52
52
|
},
|
|
53
53
|
inputs: {
|
|
@@ -64,12 +64,12 @@ const graph_data = {
|
|
|
64
64
|
};
|
|
65
65
|
export const captions = async (context) => {
|
|
66
66
|
try {
|
|
67
|
-
|
|
67
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", true);
|
|
68
68
|
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
|
69
69
|
graph.injectValue("context", context);
|
|
70
70
|
await graph.run();
|
|
71
71
|
}
|
|
72
72
|
finally {
|
|
73
|
-
|
|
73
|
+
MulmoStudioContextMethods.setSessionState(context, "caption", false);
|
|
74
74
|
}
|
|
75
75
|
};
|
package/lib/actions/images.js
CHANGED
|
@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
|
|
|
15
15
|
dotenv.config();
|
|
16
16
|
// const openai = new OpenAI();
|
|
17
17
|
import { GoogleAuth } from "google-auth-library";
|
|
18
|
-
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
19
18
|
const htmlStyle = (script, beat) => {
|
|
20
19
|
return {
|
|
21
20
|
canvasSize: MulmoScriptMethods.getCanvasSize(script),
|
|
@@ -34,14 +33,14 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
34
33
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
35
34
|
if (plugin) {
|
|
36
35
|
try {
|
|
37
|
-
|
|
36
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
|
|
38
37
|
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
39
38
|
const path = await plugin.process(processorParams);
|
|
40
39
|
// undefined prompt indicates that image generation is not needed
|
|
41
40
|
return { imagePath: path, ...returnValue };
|
|
42
41
|
}
|
|
43
42
|
finally {
|
|
44
|
-
|
|
43
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
|
|
45
44
|
}
|
|
46
45
|
}
|
|
47
46
|
}
|
|
@@ -104,7 +103,7 @@ const graph_data = {
|
|
|
104
103
|
file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
|
|
105
104
|
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
106
105
|
force: ":context.force", // only for fileCacheAgentFilter
|
|
107
|
-
|
|
106
|
+
mulmoContext: ":context", // for fileCacheAgentFilter
|
|
108
107
|
index: ":__mapIndex", // for fileCacheAgentFilter
|
|
109
108
|
sessionType: "image", // for fileCacheAgentFilter
|
|
110
109
|
params: {
|
|
@@ -134,13 +133,20 @@ const graph_data = {
|
|
|
134
133
|
},
|
|
135
134
|
defaultValue: {},
|
|
136
135
|
},
|
|
137
|
-
|
|
136
|
+
onComplete: {
|
|
138
137
|
agent: "copyAgent",
|
|
139
138
|
inputs: {
|
|
140
|
-
onComplete: ":movieGenerator",
|
|
139
|
+
onComplete: ":movieGenerator", // to wait for movieGenerator to finish
|
|
141
140
|
imageFile: ":preprocessor.imagePath",
|
|
142
141
|
movieFile: ":preprocessor.movieFile",
|
|
143
142
|
},
|
|
143
|
+
},
|
|
144
|
+
output: {
|
|
145
|
+
agent: "copyAgent",
|
|
146
|
+
inputs: {
|
|
147
|
+
imageFile: ":onComplete.imageFile",
|
|
148
|
+
movieFile: ":onComplete.movieFile",
|
|
149
|
+
},
|
|
144
150
|
isResult: true,
|
|
145
151
|
},
|
|
146
152
|
},
|
|
@@ -150,11 +156,26 @@ const graph_data = {
|
|
|
150
156
|
agent: (namedInputs) => {
|
|
151
157
|
const { array, context } = namedInputs;
|
|
152
158
|
const { studio } = context;
|
|
159
|
+
const beatIndexMap = {};
|
|
153
160
|
array.forEach((update, index) => {
|
|
154
161
|
const beat = studio.beats[index];
|
|
155
162
|
studio.beats[index] = { ...beat, ...update };
|
|
163
|
+
const id = studio.script.beats[index].id;
|
|
164
|
+
if (id) {
|
|
165
|
+
beatIndexMap[id] = index;
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
studio.beats.forEach((studioBeat, index) => {
|
|
169
|
+
const beat = studio.script.beats[index];
|
|
170
|
+
if (beat.image?.type === "beat") {
|
|
171
|
+
if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
|
|
172
|
+
studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
|
|
173
|
+
}
|
|
174
|
+
else if (index > 0) {
|
|
175
|
+
studioBeat.imageFile = studio.beats[index - 1].imageFile;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
156
178
|
});
|
|
157
|
-
// console.log(namedInputs);
|
|
158
179
|
return { studio };
|
|
159
180
|
},
|
|
160
181
|
inputs: {
|
|
@@ -181,9 +202,9 @@ const googleAuth = async () => {
|
|
|
181
202
|
const accessToken = await client.getAccessToken();
|
|
182
203
|
return accessToken.token;
|
|
183
204
|
}
|
|
184
|
-
catch (
|
|
205
|
+
catch (error) {
|
|
185
206
|
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
186
|
-
|
|
207
|
+
throw error;
|
|
187
208
|
}
|
|
188
209
|
};
|
|
189
210
|
const generateImages = async (context, callbacks) => {
|
|
@@ -236,7 +257,25 @@ const generateImages = async (context, callbacks) => {
|
|
|
236
257
|
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
237
258
|
}
|
|
238
259
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
239
|
-
|
|
260
|
+
// Detect file extension from Content-Type header or URL
|
|
261
|
+
const extension = (() => {
|
|
262
|
+
const contentType = response.headers.get("content-type");
|
|
263
|
+
if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
|
|
264
|
+
return "jpg";
|
|
265
|
+
}
|
|
266
|
+
else if (contentType?.includes("png")) {
|
|
267
|
+
return "png";
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
// Fall back to URL extension
|
|
271
|
+
const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
|
|
272
|
+
if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
|
|
273
|
+
return urlExtension === "jpeg" ? "jpg" : urlExtension;
|
|
274
|
+
}
|
|
275
|
+
return "png"; // default
|
|
276
|
+
}
|
|
277
|
+
})();
|
|
278
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
|
|
240
279
|
await fs.promises.writeFile(imagePath, buffer);
|
|
241
280
|
imageRefs[key] = imagePath;
|
|
242
281
|
}
|
|
@@ -263,10 +302,10 @@ const generateImages = async (context, callbacks) => {
|
|
|
263
302
|
};
|
|
264
303
|
export const images = async (context, callbacks) => {
|
|
265
304
|
try {
|
|
266
|
-
|
|
305
|
+
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
267
306
|
await generateImages(context, callbacks);
|
|
268
307
|
}
|
|
269
308
|
finally {
|
|
270
|
-
|
|
309
|
+
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
271
310
|
}
|
|
272
311
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { GraphAILogger } from "graphai";
|
|
1
|
+
import { GraphAILogger, assert } from "graphai";
|
|
2
|
+
import { mulmoTransitionSchema } from "../types/index.js";
|
|
2
3
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
3
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
4
5
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
5
|
-
import {
|
|
6
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
6
7
|
// const isMac = process.platform === "darwin";
|
|
7
8
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
8
9
|
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
@@ -38,10 +39,10 @@ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
|
|
|
38
39
|
`[${audioId}]`,
|
|
39
40
|
};
|
|
40
41
|
};
|
|
41
|
-
const getOutputOption = (audioId) => {
|
|
42
|
+
const getOutputOption = (audioId, videoId) => {
|
|
42
43
|
return [
|
|
43
44
|
"-preset medium", // Changed from veryfast to medium for better compression
|
|
44
|
-
|
|
45
|
+
`-map [${videoId}]`, // Map the video stream
|
|
45
46
|
`-map ${audioId}`, // Map the audio stream
|
|
46
47
|
`-c:v ${videoCodec}`, // Set video codec
|
|
47
48
|
...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
|
|
@@ -61,14 +62,17 @@ const getOutputOption = (audioId) => {
|
|
|
61
62
|
const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
|
|
62
63
|
const start = performance.now();
|
|
63
64
|
const ffmpegContext = FfmpegContextInit();
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
|
|
66
|
+
if (missingIndex !== -1) {
|
|
67
|
+
GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
|
|
68
|
+
return false;
|
|
67
69
|
}
|
|
68
70
|
const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
69
71
|
// Add each image input
|
|
70
72
|
const filterComplexVideoIds = [];
|
|
71
73
|
const filterComplexAudioIds = [];
|
|
74
|
+
const transitionVideoIds = [];
|
|
75
|
+
const beatTimestamps = [];
|
|
72
76
|
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
77
|
const beat = studio.script.beats[index];
|
|
74
78
|
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
@@ -102,16 +106,43 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
102
106
|
else {
|
|
103
107
|
filterComplexVideoIds.push(videoId);
|
|
104
108
|
}
|
|
109
|
+
if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
|
|
110
|
+
const sourceId = filterComplexVideoIds.pop();
|
|
111
|
+
ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
|
|
112
|
+
filterComplexVideoIds.push(`${sourceId}_0`);
|
|
113
|
+
transitionVideoIds.push(`${sourceId}_1`);
|
|
114
|
+
}
|
|
105
115
|
if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
|
|
106
116
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
|
|
107
117
|
filterComplexAudioIds.push(audioId);
|
|
108
118
|
ffmpegContext.filterComplex.push(audioPart);
|
|
109
119
|
}
|
|
120
|
+
beatTimestamps.push(timestamp);
|
|
110
121
|
return timestamp + duration;
|
|
111
122
|
}, 0);
|
|
123
|
+
assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
124
|
+
assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
112
125
|
// console.log("*** images", images.audioIds);
|
|
113
126
|
// Concatenate the trimmed images
|
|
114
|
-
|
|
127
|
+
const concatVideoId = "concat_video";
|
|
128
|
+
ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
|
|
129
|
+
// Add tranditions if needed
|
|
130
|
+
const mixedVideoId = (() => {
|
|
131
|
+
if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
|
|
132
|
+
const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
|
|
133
|
+
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
134
|
+
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
135
|
+
const processedVideoId = `${transitionVideoId}_f`;
|
|
136
|
+
// TODO: This mechanism does not work for video beats yet. It works only with image beats.
|
|
137
|
+
// If we can to add other transition types than fade, we need to add them here.
|
|
138
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
|
|
139
|
+
const outputId = `${transitionVideoId}_o`;
|
|
140
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
141
|
+
return outputId;
|
|
142
|
+
}, concatVideoId);
|
|
143
|
+
}
|
|
144
|
+
return concatVideoId;
|
|
145
|
+
})();
|
|
115
146
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
116
147
|
const artifactAudioId = `${audioIndex}:a`;
|
|
117
148
|
const ffmpegContextAudioId = (() => {
|
|
@@ -125,23 +156,25 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
125
156
|
}
|
|
126
157
|
return artifactAudioId;
|
|
127
158
|
})();
|
|
128
|
-
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
|
|
159
|
+
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
129
160
|
const end = performance.now();
|
|
130
161
|
GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
|
|
131
162
|
GraphAILogger.info(studio.script.title);
|
|
132
163
|
GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
164
|
+
return true;
|
|
133
165
|
};
|
|
134
166
|
export const movie = async (context) => {
|
|
135
|
-
|
|
167
|
+
MulmoStudioContextMethods.setSessionState(context, "video", true);
|
|
136
168
|
try {
|
|
137
169
|
const { studio, fileDirs, caption } = context;
|
|
138
170
|
const { outDirPath } = fileDirs;
|
|
139
171
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
140
172
|
const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
|
|
141
|
-
await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)
|
|
142
|
-
|
|
173
|
+
if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
|
|
174
|
+
writingMessage(outputVideoPath);
|
|
175
|
+
}
|
|
143
176
|
}
|
|
144
177
|
finally {
|
|
145
|
-
|
|
178
|
+
MulmoStudioContextMethods.setSessionState(context, "video", false);
|
|
146
179
|
}
|
|
147
180
|
};
|
package/lib/actions/pdf.js
CHANGED
|
@@ -6,7 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
|
|
|
6
6
|
import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
|
|
7
7
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
8
8
|
import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
|
|
9
|
-
import {
|
|
9
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
10
10
|
const imagesPerPage = 4;
|
|
11
11
|
const offset = 10;
|
|
12
12
|
const handoutImageRatio = 0.5;
|
|
@@ -224,10 +224,10 @@ const generatePdf = async (context, pdfMode, pdfSize) => {
|
|
|
224
224
|
};
|
|
225
225
|
export const pdf = async (context, pdfMode, pdfSize) => {
|
|
226
226
|
try {
|
|
227
|
-
|
|
227
|
+
MulmoStudioContextMethods.setSessionState(context, "pdf", true);
|
|
228
228
|
await generatePdf(context, pdfMode, pdfSize);
|
|
229
229
|
}
|
|
230
230
|
finally {
|
|
231
|
-
|
|
231
|
+
MulmoStudioContextMethods.setSessionState(context, "pdf", false);
|
|
232
232
|
}
|
|
233
233
|
};
|
package/lib/actions/translate.js
CHANGED
|
@@ -6,19 +6,19 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
|
-
import {
|
|
9
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
10
10
|
const vanillaAgents = agents.default ?? agents;
|
|
11
11
|
const translateGraph = {
|
|
12
12
|
version: 0.5,
|
|
13
13
|
nodes: {
|
|
14
|
-
|
|
14
|
+
context: {},
|
|
15
15
|
defaultLang: {},
|
|
16
16
|
outDirPath: {},
|
|
17
17
|
outputStudioFilePath: {},
|
|
18
18
|
lang: {
|
|
19
19
|
agent: "stringUpdateTextAgent",
|
|
20
20
|
inputs: {
|
|
21
|
-
newText: ":studio.script.lang",
|
|
21
|
+
newText: ":context.studio.script.lang",
|
|
22
22
|
oldText: ":defaultLang",
|
|
23
23
|
},
|
|
24
24
|
},
|
|
@@ -27,15 +27,15 @@ const translateGraph = {
|
|
|
27
27
|
isResult: true,
|
|
28
28
|
agent: "mergeObjectAgent",
|
|
29
29
|
inputs: {
|
|
30
|
-
items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
|
|
30
|
+
items: [":context.studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
|
|
31
31
|
},
|
|
32
32
|
},
|
|
33
33
|
beatsMap: {
|
|
34
34
|
agent: "mapAgent",
|
|
35
35
|
inputs: {
|
|
36
36
|
targetLangs: ":targetLangs",
|
|
37
|
-
|
|
38
|
-
rows: ":studio.script.beats",
|
|
37
|
+
context: ":context",
|
|
38
|
+
rows: ":context.studio.script.beats",
|
|
39
39
|
lang: ":lang",
|
|
40
40
|
},
|
|
41
41
|
params: {
|
|
@@ -52,7 +52,7 @@ const translateGraph = {
|
|
|
52
52
|
},
|
|
53
53
|
inputs: {
|
|
54
54
|
index: ":__mapIndex",
|
|
55
|
-
rows: ":studio.multiLingual",
|
|
55
|
+
rows: ":context.studio.multiLingual",
|
|
56
56
|
},
|
|
57
57
|
},
|
|
58
58
|
preprocessMultiLingual: {
|
|
@@ -62,7 +62,7 @@ const translateGraph = {
|
|
|
62
62
|
multiLingual: ":multiLingual",
|
|
63
63
|
rows: ":targetLangs",
|
|
64
64
|
lang: ":lang.text",
|
|
65
|
-
|
|
65
|
+
context: ":context",
|
|
66
66
|
beatIndex: ":__mapIndex",
|
|
67
67
|
},
|
|
68
68
|
params: {
|
|
@@ -79,7 +79,7 @@ const translateGraph = {
|
|
|
79
79
|
multiLingual: ":multiLingual", // for cache
|
|
80
80
|
lang: ":lang", // for cache
|
|
81
81
|
beatIndex: ":beatIndex", // for cache
|
|
82
|
-
|
|
82
|
+
mulmoContext: ":context", // for cache
|
|
83
83
|
system: translateSystemPrompt,
|
|
84
84
|
prompt: translatePrompts,
|
|
85
85
|
},
|
|
@@ -175,7 +175,7 @@ const translateGraph = {
|
|
|
175
175
|
};
|
|
176
176
|
const localizedTextCacheAgentFilter = async (context, next) => {
|
|
177
177
|
const { namedInputs } = context;
|
|
178
|
-
const {
|
|
178
|
+
const { mulmoContext, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
|
|
179
179
|
if (!beat.text) {
|
|
180
180
|
return { text: "" };
|
|
181
181
|
}
|
|
@@ -192,11 +192,11 @@ const localizedTextCacheAgentFilter = async (context, next) => {
|
|
|
192
192
|
return { text: beat.text };
|
|
193
193
|
}
|
|
194
194
|
try {
|
|
195
|
-
|
|
195
|
+
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
|
|
196
196
|
return await next(context);
|
|
197
197
|
}
|
|
198
198
|
finally {
|
|
199
|
-
|
|
199
|
+
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, false);
|
|
200
200
|
}
|
|
201
201
|
};
|
|
202
202
|
const agentFilters = [
|
|
@@ -210,14 +210,14 @@ const defaultLang = "en";
|
|
|
210
210
|
const targetLangs = ["ja", "en"];
|
|
211
211
|
export const translate = async (context, callbacks) => {
|
|
212
212
|
try {
|
|
213
|
-
|
|
213
|
+
MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
|
|
214
214
|
const { studio, fileDirs } = context;
|
|
215
215
|
const { outDirPath } = fileDirs;
|
|
216
216
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
217
217
|
mkdir(outDirPath);
|
|
218
218
|
assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
219
219
|
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
|
|
220
|
-
graph.injectValue("
|
|
220
|
+
graph.injectValue("context", context);
|
|
221
221
|
graph.injectValue("defaultLang", defaultLang);
|
|
222
222
|
graph.injectValue("targetLangs", targetLangs);
|
|
223
223
|
graph.injectValue("outDirPath", outDirPath);
|
|
@@ -234,6 +234,6 @@ export const translate = async (context, callbacks) => {
|
|
|
234
234
|
}
|
|
235
235
|
}
|
|
236
236
|
finally {
|
|
237
|
-
|
|
237
|
+
MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
|
|
238
238
|
}
|
|
239
239
|
};
|