mulmocast 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/ani.json +48 -0
- package/assets/templates/ani_ja.json +45 -0
- package/lib/actions/audio.js +2 -0
- package/lib/actions/image_agents.d.ts +28 -22
- package/lib/actions/image_agents.js +4 -4
- package/lib/actions/images.js +12 -21
- package/lib/actions/translate.d.ts +4 -1
- package/lib/actions/translate.js +6 -3
- package/lib/agents/combine_audio_files_agent.js +106 -86
- package/lib/agents/movie_replicate_agent.js +4 -3
- package/lib/agents/tts_elevenlabs_agent.js +1 -1
- package/lib/agents/tts_nijivoice_agent.js +2 -3
- package/lib/cli/commands/tool/scripting/builder.js +1 -1
- package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.js +1 -1
- package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
- package/lib/index.browser.d.ts +1 -3
- package/lib/index.browser.js +2 -4
- package/lib/index.common.d.ts +2 -0
- package/lib/index.common.js +3 -0
- package/lib/index.node.d.ts +7 -0
- package/lib/index.node.js +8 -0
- package/lib/methods/mulmo_presentation_style.d.ts +15 -1
- package/lib/methods/mulmo_presentation_style.js +10 -11
- package/lib/tools/story_to_script.d.ts +1 -1
- package/lib/types/schema.d.ts +343 -322
- package/lib/types/schema.js +21 -14
- package/lib/types/type.d.ts +3 -2
- package/lib/utils/context.d.ts +73 -72
- package/lib/utils/ffmpeg_utils.js +6 -0
- package/lib/utils/image_plugins/image.d.ts +2 -2
- package/lib/utils/image_plugins/movie.d.ts +2 -2
- package/lib/utils/preprocess.d.ts +37 -36
- package/lib/utils/provider2agent.d.ts +9 -7
- package/lib/utils/provider2agent.js +12 -7
- package/lib/utils/utils.d.ts +1 -2
- package/lib/utils/utils.js +7 -2
- package/package.json +11 -11
- package/scripts/templates/presentation.json~ +0 -119
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Presentation with Ani in Japanese",
|
|
3
|
+
"description": "Template for presentation with Ani in Japanese.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"movieParams": {
|
|
11
|
+
"provider": "replicate",
|
|
12
|
+
"model": "bytedance/seedance-1-lite"
|
|
13
|
+
},
|
|
14
|
+
"speechParams": {
|
|
15
|
+
"provider": "openai",
|
|
16
|
+
"speakers": {
|
|
17
|
+
"Presenter": {
|
|
18
|
+
"voiceId": "shimmer",
|
|
19
|
+
"speechOptions": { "instruction": "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl." }
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"audioParams": {
|
|
24
|
+
"bgm": {
|
|
25
|
+
"kind": "url",
|
|
26
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"lang": "en",
|
|
30
|
+
"canvasSize": {
|
|
31
|
+
"width": 1024,
|
|
32
|
+
"height": 1536
|
|
33
|
+
},
|
|
34
|
+
"imageParams": {
|
|
35
|
+
"style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
|
|
36
|
+
"images": {
|
|
37
|
+
"ani": {
|
|
38
|
+
"type": "image",
|
|
39
|
+
"source": {
|
|
40
|
+
"kind": "url",
|
|
41
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"scriptName": "image_prompts_template.json"
|
|
48
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Presentation with Ani",
|
|
3
|
+
"description": "Template for presentation with Ani.",
|
|
4
|
+
"systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"movieParams": {
|
|
11
|
+
"provider": "replicate",
|
|
12
|
+
"model": "bytedance/seedance-1-lite"
|
|
13
|
+
},
|
|
14
|
+
"audioParams": {
|
|
15
|
+
"bgm": {
|
|
16
|
+
"kind": "url",
|
|
17
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"lang": "ja",
|
|
21
|
+
"canvasSize": {
|
|
22
|
+
"width": 1024,
|
|
23
|
+
"height": 1536
|
|
24
|
+
},
|
|
25
|
+
"speechParams": {
|
|
26
|
+
"provider": "nijivoice",
|
|
27
|
+
"speakers": {
|
|
28
|
+
"Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"imageParams": {
|
|
32
|
+
"style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
|
|
33
|
+
"images": {
|
|
34
|
+
"ani": {
|
|
35
|
+
"type": "image",
|
|
36
|
+
"source": {
|
|
37
|
+
"kind": "url",
|
|
38
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"scriptName": "image_prompts_template.json"
|
|
45
|
+
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -143,6 +143,7 @@ const graph_data = {
|
|
|
143
143
|
},
|
|
144
144
|
addBGM: {
|
|
145
145
|
agent: "addBGMAgent",
|
|
146
|
+
unless: ":context.presentationStyle.audioParams.bgmVolume.equal(0)",
|
|
146
147
|
inputs: {
|
|
147
148
|
wait: ":combineFiles",
|
|
148
149
|
voiceFile: ":audioCombinedFilePath",
|
|
@@ -153,6 +154,7 @@ const graph_data = {
|
|
|
153
154
|
},
|
|
154
155
|
},
|
|
155
156
|
isResult: true,
|
|
157
|
+
defaultValue: {},
|
|
156
158
|
},
|
|
157
159
|
title: {
|
|
158
160
|
agent: "copyAgent",
|
|
@@ -44,17 +44,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
44
44
|
} | {
|
|
45
45
|
imagePath: string;
|
|
46
46
|
imageFromMovie: boolean;
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
47
|
+
movieAgentInfo: {
|
|
48
|
+
agent: string;
|
|
49
|
+
movieParams: {
|
|
50
|
+
speed?: number | undefined;
|
|
51
|
+
provider?: string | undefined;
|
|
52
|
+
model?: string | undefined;
|
|
53
|
+
fillOption?: {
|
|
54
|
+
style: "aspectFit" | "aspectFill";
|
|
55
|
+
} | undefined;
|
|
56
|
+
transition?: {
|
|
57
|
+
type: "fade" | "slideout_left";
|
|
58
|
+
duration: number;
|
|
59
|
+
} | undefined;
|
|
60
|
+
};
|
|
58
61
|
};
|
|
59
62
|
imageParams: {
|
|
60
63
|
provider: string;
|
|
@@ -91,17 +94,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
91
94
|
imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
|
|
92
95
|
prompt: string;
|
|
93
96
|
referenceImages: string[];
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
97
|
+
movieAgentInfo: {
|
|
98
|
+
agent: string;
|
|
99
|
+
movieParams: {
|
|
100
|
+
speed?: number | undefined;
|
|
101
|
+
provider?: string | undefined;
|
|
102
|
+
model?: string | undefined;
|
|
103
|
+
fillOption?: {
|
|
104
|
+
style: "aspectFit" | "aspectFill";
|
|
105
|
+
} | undefined;
|
|
106
|
+
transition?: {
|
|
107
|
+
type: "fade" | "slideout_left";
|
|
108
|
+
duration: number;
|
|
109
|
+
} | undefined;
|
|
110
|
+
};
|
|
105
111
|
};
|
|
106
112
|
imageParams: {
|
|
107
113
|
provider: string;
|
|
@@ -28,15 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
28
28
|
// undefined prompt indicates that image generation is not needed
|
|
29
29
|
return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
|
|
30
30
|
}
|
|
31
|
-
const
|
|
32
|
-
GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
|
|
31
|
+
const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
|
|
32
|
+
GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
|
|
33
33
|
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
34
|
-
return { ...returnValue, imagePath, imageFromMovie: true,
|
|
34
|
+
return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
|
|
35
35
|
}
|
|
36
36
|
// referenceImages for "edit_image", openai agent.
|
|
37
37
|
const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
|
|
38
38
|
const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
|
|
39
|
-
return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages,
|
|
39
|
+
return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieAgentInfo };
|
|
40
40
|
};
|
|
41
41
|
export const imagePluginAgent = async (namedInputs) => {
|
|
42
42
|
const { context, beat, index } = namedInputs;
|
package/lib/actions/images.js
CHANGED
|
@@ -39,7 +39,6 @@ const beat_graph_data = {
|
|
|
39
39
|
nodes: {
|
|
40
40
|
context: {},
|
|
41
41
|
htmlImageAgentInfo: {},
|
|
42
|
-
movieAgentInfo: {},
|
|
43
42
|
imageRefs: {},
|
|
44
43
|
beat: {},
|
|
45
44
|
__mapIndex: {},
|
|
@@ -134,7 +133,7 @@ const beat_graph_data = {
|
|
|
134
133
|
},
|
|
135
134
|
movieGenerator: {
|
|
136
135
|
if: ":preprocessor.movieFile",
|
|
137
|
-
agent: ":movieAgentInfo.agent",
|
|
136
|
+
agent: ":preprocessor.movieAgentInfo.agent",
|
|
138
137
|
inputs: {
|
|
139
138
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
140
139
|
prompt: ":beat.moviePrompt",
|
|
@@ -147,7 +146,7 @@ const beat_graph_data = {
|
|
|
147
146
|
mulmoContext: ":context",
|
|
148
147
|
},
|
|
149
148
|
params: {
|
|
150
|
-
model: ":preprocessor.movieParams.model",
|
|
149
|
+
model: ":preprocessor.movieAgentInfo.movieParams.model",
|
|
151
150
|
duration: ":beat.duration",
|
|
152
151
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
153
152
|
},
|
|
@@ -167,16 +166,19 @@ const beat_graph_data = {
|
|
|
167
166
|
defaultValue: {},
|
|
168
167
|
},
|
|
169
168
|
audioChecker: {
|
|
170
|
-
if: ":preprocessor.movieFile",
|
|
171
169
|
agent: async (namedInputs) => {
|
|
172
|
-
const
|
|
170
|
+
const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
|
|
171
|
+
if (!sourceFile) {
|
|
172
|
+
return { hasMovieAudio: false };
|
|
173
|
+
}
|
|
174
|
+
const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
|
|
173
175
|
return { hasMovieAudio: hasAudio };
|
|
174
176
|
},
|
|
175
177
|
inputs: {
|
|
176
|
-
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
178
|
+
onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
|
|
177
179
|
movieFile: ":preprocessor.movieFile",
|
|
180
|
+
imageFile: ":preprocessor.imagePath",
|
|
178
181
|
},
|
|
179
|
-
defaultValue: {},
|
|
180
182
|
},
|
|
181
183
|
output: {
|
|
182
184
|
agent: "copyAgent",
|
|
@@ -201,7 +203,6 @@ const graph_data = {
|
|
|
201
203
|
nodes: {
|
|
202
204
|
context: {},
|
|
203
205
|
htmlImageAgentInfo: {},
|
|
204
|
-
movieAgentInfo: {},
|
|
205
206
|
outputStudioFilePath: {},
|
|
206
207
|
imageRefs: {},
|
|
207
208
|
map: {
|
|
@@ -210,7 +211,6 @@ const graph_data = {
|
|
|
210
211
|
rows: ":context.studio.script.beats",
|
|
211
212
|
context: ":context",
|
|
212
213
|
htmlImageAgentInfo: ":htmlImageAgentInfo",
|
|
213
|
-
movieAgentInfo: ":movieAgentInfo",
|
|
214
214
|
imageRefs: ":imageRefs",
|
|
215
215
|
},
|
|
216
216
|
isResult: true,
|
|
@@ -293,17 +293,11 @@ export const graphOption = async (context, settings) => {
|
|
|
293
293
|
const config = settings2GraphAIConfig(settings, process.env);
|
|
294
294
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
295
295
|
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
296
|
-
userAssert(!!
|
|
296
|
+
userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
|
|
297
297
|
GraphAILogger.log("google was specified as text2image engine");
|
|
298
298
|
const token = await googleAuth();
|
|
299
|
-
config["imageGoogleAgent"] =
|
|
300
|
-
|
|
301
|
-
token,
|
|
302
|
-
};
|
|
303
|
-
config["movieGoogleAgent"] = {
|
|
304
|
-
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
305
|
-
token,
|
|
306
|
-
};
|
|
299
|
+
config["imageGoogleAgent"].token = token;
|
|
300
|
+
config["movieGoogleAgent"].token = token;
|
|
307
301
|
}
|
|
308
302
|
options.config = config;
|
|
309
303
|
return options;
|
|
@@ -320,9 +314,6 @@ const prepareGenerateImages = async (context) => {
|
|
|
320
314
|
const injections = {
|
|
321
315
|
context,
|
|
322
316
|
htmlImageAgentInfo,
|
|
323
|
-
movieAgentInfo: {
|
|
324
|
-
agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
|
|
325
|
-
},
|
|
326
317
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
327
318
|
imageRefs,
|
|
328
319
|
};
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
4
|
-
export declare const translate: (context: MulmoStudioContext,
|
|
4
|
+
export declare const translate: (context: MulmoStudioContext, args?: {
|
|
5
|
+
callbacks?: CallbackFunction[];
|
|
6
|
+
settings?: Record<string, string>;
|
|
7
|
+
}) => Promise<void>;
|
package/lib/actions/translate.js
CHANGED
|
@@ -4,6 +4,7 @@ import * as agents from "@graphai/vanilla";
|
|
|
4
4
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
7
8
|
import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
9
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
10
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
@@ -208,15 +209,17 @@ const agentFilters = [
|
|
|
208
209
|
];
|
|
209
210
|
const defaultLang = "en";
|
|
210
211
|
const targetLangs = ["ja", "en"];
|
|
211
|
-
export const translate = async (context,
|
|
212
|
+
export const translate = async (context, args) => {
|
|
213
|
+
const { settings, callbacks } = args ?? {};
|
|
212
214
|
try {
|
|
213
215
|
MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
|
|
214
216
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
215
217
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
216
218
|
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
|
|
217
219
|
mkdir(outDirPath);
|
|
218
|
-
|
|
219
|
-
|
|
220
|
+
const config = settings2GraphAIConfig(settings, process.env);
|
|
221
|
+
assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
222
|
+
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
|
|
220
223
|
graph.injectValue("context", context);
|
|
221
224
|
graph.injectValue("defaultLang", defaultLang);
|
|
222
225
|
graph.injectValue("targetLangs", targetLangs);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { assert, GraphAILogger } from "graphai";
|
|
2
2
|
import { silent60secPath } from "../utils/file.js";
|
|
3
|
-
import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
3
|
+
import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration, } from "../utils/ffmpeg_utils.js";
|
|
4
4
|
import { userAssert } from "../utils/utils.js";
|
|
5
5
|
const getMovieDuration = async (beat) => {
|
|
6
6
|
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
@@ -62,6 +62,93 @@ const getGroupBeatDurations = (context, group, audioDuration) => {
|
|
|
62
62
|
});
|
|
63
63
|
return durations;
|
|
64
64
|
};
|
|
65
|
+
const getInputIds = (context, mediaDurations, ffmpegContext, silentIds) => {
|
|
66
|
+
const inputIds = [];
|
|
67
|
+
context.studio.beats.forEach((studioBeat, index) => {
|
|
68
|
+
const { silenceDuration } = mediaDurations[index];
|
|
69
|
+
const paddingId = `[padding_${index}]`;
|
|
70
|
+
if (studioBeat.audioFile) {
|
|
71
|
+
const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
|
|
72
|
+
inputIds.push(audioId);
|
|
73
|
+
}
|
|
74
|
+
if (silenceDuration > 0) {
|
|
75
|
+
const silentId = silentIds.pop();
|
|
76
|
+
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
|
|
77
|
+
inputIds.push(paddingId);
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
return inputIds;
|
|
81
|
+
};
|
|
82
|
+
const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations, groupLength) => {
|
|
83
|
+
return (remaining, idx, iGroup) => {
|
|
84
|
+
const subBeatDurations = mediaDurations[idx];
|
|
85
|
+
userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
|
|
86
|
+
if (iGroup === groupLength - 1) {
|
|
87
|
+
beatDurations.push(remaining);
|
|
88
|
+
subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
|
|
89
|
+
return 0;
|
|
90
|
+
}
|
|
91
|
+
const nextBeat = context.studio.script.beats[idx + 1];
|
|
92
|
+
assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
|
|
93
|
+
const voiceStartAt = nextBeat.image?.startAt;
|
|
94
|
+
if (voiceStartAt) {
|
|
95
|
+
const remainingDuration = movieDuration - voiceStartAt;
|
|
96
|
+
const duration = remaining - remainingDuration;
|
|
97
|
+
userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
|
|
98
|
+
beatDurations.push(duration);
|
|
99
|
+
subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
|
|
100
|
+
userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
|
|
101
|
+
return remainingDuration;
|
|
102
|
+
}
|
|
103
|
+
beatDurations.push(subBeatDurations.audioDuration);
|
|
104
|
+
return remaining - subBeatDurations.audioDuration;
|
|
105
|
+
};
|
|
106
|
+
};
|
|
107
|
+
const getVoiceOverGroup = (context, index) => {
|
|
108
|
+
const group = [index];
|
|
109
|
+
for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
|
|
110
|
+
group.push(i);
|
|
111
|
+
}
|
|
112
|
+
return group;
|
|
113
|
+
};
|
|
114
|
+
const getSpillOverGroup = (context, mediaDurations, index) => {
|
|
115
|
+
const group = [index];
|
|
116
|
+
for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
|
|
117
|
+
group.push(i);
|
|
118
|
+
}
|
|
119
|
+
return group;
|
|
120
|
+
};
|
|
121
|
+
const spilledOverAudio = (context, group, audioDuration, beatDurations, mediaDurations) => {
|
|
122
|
+
const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
|
|
123
|
+
// Yes, the current beat has spilled over audio.
|
|
124
|
+
const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
|
|
125
|
+
if (beatsTotalDuration > audioDuration + 0.01) {
|
|
126
|
+
// 0.01 is a tolerance to avoid floating point precision issues
|
|
127
|
+
group.reduce((remaining, idx, iGroup) => {
|
|
128
|
+
if (remaining >= groupBeatsDurations[iGroup]) {
|
|
129
|
+
return remaining - groupBeatsDurations[iGroup];
|
|
130
|
+
}
|
|
131
|
+
mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
|
|
132
|
+
return 0;
|
|
133
|
+
}, audioDuration);
|
|
134
|
+
}
|
|
135
|
+
else if (audioDuration > beatsTotalDuration) {
|
|
136
|
+
// Last beat gets the rest of the audio.
|
|
137
|
+
groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
|
|
138
|
+
}
|
|
139
|
+
beatDurations.push(...groupBeatsDurations);
|
|
140
|
+
};
|
|
141
|
+
const noSpilledOverAudio = (context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations) => {
|
|
142
|
+
// padding is the amount of audio padding specified in the script.
|
|
143
|
+
const padding = getPadding(context, beat, index);
|
|
144
|
+
// totalPadding is the amount of audio padding to be added to the audio file.
|
|
145
|
+
const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
|
|
146
|
+
const beatDuration = audioDuration + totalPadding;
|
|
147
|
+
beatDurations.push(beatDuration);
|
|
148
|
+
if (totalPadding > 0) {
|
|
149
|
+
mediaDurations[index].silenceDuration = totalPadding;
|
|
150
|
+
}
|
|
151
|
+
};
|
|
65
152
|
const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
66
153
|
const { context, combinedFileName } = namedInputs;
|
|
67
154
|
const ffmpegContext = FfmpegContextInit();
|
|
@@ -77,91 +164,37 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
77
164
|
const { audioDuration, movieDuration } = mediaDurations[index];
|
|
78
165
|
// Check if we are processing a voice-over beat.
|
|
79
166
|
if (movieDuration > 0) {
|
|
80
|
-
const group =
|
|
81
|
-
for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
|
|
82
|
-
group.push(i);
|
|
83
|
-
}
|
|
167
|
+
const group = getVoiceOverGroup(context, index);
|
|
84
168
|
if (group.length > 1) {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
|
|
88
|
-
if (iGroup === group.length - 1) {
|
|
89
|
-
beatDurations.push(remaining);
|
|
90
|
-
subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
|
|
91
|
-
return 0;
|
|
92
|
-
}
|
|
93
|
-
const nextBeat = context.studio.script.beats[idx + 1];
|
|
94
|
-
assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
|
|
95
|
-
const voiceStartAt = nextBeat.image?.startAt;
|
|
96
|
-
if (voiceStartAt) {
|
|
97
|
-
const remainingDuration = movieDuration - voiceStartAt;
|
|
98
|
-
const duration = remaining - remainingDuration;
|
|
99
|
-
userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
|
|
100
|
-
beatDurations.push(duration);
|
|
101
|
-
subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
|
|
102
|
-
userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
|
|
103
|
-
return remainingDuration;
|
|
104
|
-
}
|
|
105
|
-
beatDurations.push(subBeatDurations.audioDuration);
|
|
106
|
-
return remaining - subBeatDurations.audioDuration;
|
|
107
|
-
}, movieDuration);
|
|
169
|
+
GraphAILogger.log(`Voice over group: ${group.length}`);
|
|
170
|
+
group.reduce(voiceOverProcess(context, mediaDurations, movieDuration, beatDurations, group.length), movieDuration);
|
|
108
171
|
return;
|
|
109
172
|
}
|
|
110
173
|
}
|
|
111
174
|
// Check if the current beat has media and the next beat does not have media.
|
|
112
175
|
if (audioDuration > 0) {
|
|
113
176
|
// Check if the current beat has spilled over audio.
|
|
114
|
-
const group =
|
|
115
|
-
for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
|
|
116
|
-
group.push(i);
|
|
117
|
-
}
|
|
177
|
+
const group = getSpillOverGroup(context, mediaDurations, index);
|
|
118
178
|
if (group.length > 1) {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if (beatsTotalDuration > audioDuration + 0.01) {
|
|
123
|
-
// 0.01 is a tolerance to avoid floating point precision issues
|
|
124
|
-
group.reduce((remaining, idx, iGroup) => {
|
|
125
|
-
if (remaining >= groupBeatsDurations[iGroup]) {
|
|
126
|
-
return remaining - groupBeatsDurations[iGroup];
|
|
127
|
-
}
|
|
128
|
-
mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
|
|
129
|
-
return 0;
|
|
130
|
-
}, audioDuration);
|
|
131
|
-
}
|
|
132
|
-
else {
|
|
133
|
-
// Last beat gets the rest of the audio.
|
|
134
|
-
if (audioDuration > beatsTotalDuration) {
|
|
135
|
-
groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
beatDurations.push(...groupBeatsDurations);
|
|
139
|
-
}
|
|
140
|
-
else {
|
|
141
|
-
// No spilled over audio.
|
|
142
|
-
assert(beatDurations.length === index, "beatDurations.length !== index");
|
|
143
|
-
// padding is the amount of audio padding specified in the script.
|
|
144
|
-
const padding = getPadding(context, beat, index);
|
|
145
|
-
// totalPadding is the amount of audio padding to be added to the audio file.
|
|
146
|
-
const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
|
|
147
|
-
const beatDuration = audioDuration + totalPadding;
|
|
148
|
-
beatDurations.push(beatDuration);
|
|
149
|
-
if (totalPadding > 0) {
|
|
150
|
-
mediaDurations[index].silenceDuration = totalPadding;
|
|
151
|
-
}
|
|
179
|
+
GraphAILogger.log(`Spill over group: ${group.length}`);
|
|
180
|
+
spilledOverAudio(context, group, audioDuration, beatDurations, mediaDurations);
|
|
181
|
+
return;
|
|
152
182
|
}
|
|
183
|
+
// No spilled over audio.
|
|
184
|
+
assert(beatDurations.length === index, "beatDurations.length !== index");
|
|
185
|
+
noSpilledOverAudio(context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations);
|
|
186
|
+
return;
|
|
153
187
|
}
|
|
154
|
-
|
|
188
|
+
if (movieDuration > 0) {
|
|
155
189
|
// This beat has only a movie, not audio.
|
|
156
190
|
beatDurations.push(movieDuration);
|
|
157
191
|
mediaDurations[index].silenceDuration = movieDuration;
|
|
192
|
+
return;
|
|
158
193
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
mediaDurations[index].silenceDuration = beatDuration;
|
|
164
|
-
}
|
|
194
|
+
// The current beat has no audio, nor no spilled over audio
|
|
195
|
+
const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
|
|
196
|
+
beatDurations.push(beatDuration);
|
|
197
|
+
mediaDurations[index].silenceDuration = beatDuration;
|
|
165
198
|
});
|
|
166
199
|
assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
|
|
167
200
|
// We cannot reuse longSilentId. We need to explicitly split it for each beat.
|
|
@@ -170,20 +203,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
170
203
|
const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
|
|
171
204
|
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
172
205
|
}
|
|
173
|
-
const inputIds =
|
|
174
|
-
context.studio.beats.forEach((studioBeat, index) => {
|
|
175
|
-
const { silenceDuration } = mediaDurations[index];
|
|
176
|
-
const paddingId = `[padding_${index}]`;
|
|
177
|
-
if (studioBeat.audioFile) {
|
|
178
|
-
const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
|
|
179
|
-
inputIds.push(audioId);
|
|
180
|
-
}
|
|
181
|
-
if (silenceDuration > 0) {
|
|
182
|
-
const silentId = silentIds.pop();
|
|
183
|
-
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
|
|
184
|
-
inputIds.push(paddingId);
|
|
185
|
-
}
|
|
186
|
-
});
|
|
206
|
+
const inputIds = getInputIds(context, mediaDurations, ffmpegContext, silentIds);
|
|
187
207
|
assert(silentIds.length === 0, "silentIds.length !== 0");
|
|
188
208
|
GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
|
|
189
209
|
// Finally, combine all audio files.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync } from "fs";
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import Replicate from "replicate";
|
|
4
|
+
import { provider2MovieAgent } from "../utils/provider2agent.js";
|
|
4
5
|
async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
|
|
5
6
|
const replicate = new Replicate({
|
|
6
7
|
auth: apiKey,
|
|
@@ -21,7 +22,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
|
|
|
21
22
|
if (imagePath) {
|
|
22
23
|
const buffer = readFileSync(imagePath);
|
|
23
24
|
const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
|
|
24
|
-
if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
|
|
25
|
+
if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro" || model === "minimax/hailuo-02") {
|
|
25
26
|
input.start_image = base64Image;
|
|
26
27
|
}
|
|
27
28
|
else {
|
|
@@ -29,7 +30,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
|
|
|
29
30
|
}
|
|
30
31
|
}
|
|
31
32
|
try {
|
|
32
|
-
const output = await replicate.run(model ??
|
|
33
|
+
const output = await replicate.run(model ?? provider2MovieAgent.replicate.defaultModel, { input });
|
|
33
34
|
// Download the generated video
|
|
34
35
|
if (output && typeof output === "object" && "url" in output) {
|
|
35
36
|
const videoUrl = output.url();
|
|
@@ -62,7 +63,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
|
62
63
|
const { prompt, imagePath } = namedInputs;
|
|
63
64
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
64
65
|
const duration = params.duration ?? 5;
|
|
65
|
-
const apiKey = config?.apiKey
|
|
66
|
+
const apiKey = config?.apiKey;
|
|
66
67
|
if (!apiKey) {
|
|
67
68
|
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
68
69
|
}
|
|
@@ -3,7 +3,7 @@ import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
|
3
3
|
export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
|
|
4
4
|
const { text } = namedInputs;
|
|
5
5
|
const { voice, model, stability, similarityBoost, suppressError } = params;
|
|
6
|
-
const apiKey = config?.apiKey
|
|
6
|
+
const apiKey = config?.apiKey;
|
|
7
7
|
if (!apiKey) {
|
|
8
8
|
throw new Error("ELEVENLABS_API_KEY environment variable is required");
|
|
9
9
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { GraphAILogger, assert } from "graphai";
|
|
2
|
-
const nijovoiceApiKey = process.env.NIJIVOICE_API_KEY ?? "";
|
|
3
2
|
const errorMessage = [
|
|
4
3
|
"TTS NijiVoice: No API key. ",
|
|
5
4
|
"You have the following options:",
|
|
@@ -10,12 +9,12 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
|
|
|
10
9
|
const { suppressError, voice, speed, speed_global } = params;
|
|
11
10
|
const { apiKey } = config ?? {};
|
|
12
11
|
const { text } = namedInputs;
|
|
13
|
-
assert(!!
|
|
12
|
+
assert(!!apiKey, errorMessage);
|
|
14
13
|
const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
|
|
15
14
|
const options = {
|
|
16
15
|
method: "POST",
|
|
17
16
|
headers: {
|
|
18
|
-
"x-api-key": apiKey
|
|
17
|
+
"x-api-key": apiKey,
|
|
19
18
|
accept: "application/json",
|
|
20
19
|
"content-type": "application/json",
|
|
21
20
|
},
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { llm } from "../../../../utils/
|
|
1
|
+
import { llm } from "../../../../utils/provider2agent.js";
|
|
2
2
|
import { getAvailableTemplates } from "../../../../utils/file.js";
|
|
3
3
|
const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
|
|
4
4
|
export const builder = (yargs) => {
|