mulmocast 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +257 -39
- package/assets/audio/silent60sec.mp3 +0 -0
- package/assets/html/caption.html +45 -0
- package/assets/html/chart.html +1 -1
- package/assets/html/mermaid.html +6 -2
- package/assets/html/tailwind.html +13 -0
- package/assets/templates/business.json +57 -4
- package/assets/templates/comic_strips.json +35 -0
- package/assets/templates/ghibli_strips.json +35 -0
- package/lib/actions/audio.js +24 -11
- package/lib/actions/captions.d.ts +2 -0
- package/lib/actions/captions.js +62 -0
- package/lib/actions/images.js +3 -2
- package/lib/actions/index.d.ts +1 -0
- package/lib/actions/index.js +1 -0
- package/lib/actions/movie.js +78 -86
- package/lib/actions/pdf.js +15 -5
- package/lib/actions/translate.js +32 -26
- package/lib/agents/add_bgm_agent.js +15 -39
- package/lib/agents/combine_audio_files_agent.js +43 -36
- package/lib/agents/index.d.ts +2 -3
- package/lib/agents/index.js +2 -3
- package/lib/agents/tts_google_agent.d.ts +4 -0
- package/lib/agents/tts_google_agent.js +51 -0
- package/lib/agents/validate_schema_agent.d.ts +19 -0
- package/lib/agents/validate_schema_agent.js +36 -0
- package/lib/cli/args.d.ts +2 -0
- package/lib/cli/args.js +9 -2
- package/lib/cli/bin.d.ts +3 -0
- package/lib/cli/bin.js +38 -0
- package/lib/cli/cli.js +34 -7
- package/lib/cli/commands/audio/builder.d.ts +14 -0
- package/lib/cli/commands/audio/builder.js +6 -0
- package/lib/cli/commands/audio/handler.d.ts +4 -0
- package/lib/cli/commands/audio/handler.js +7 -0
- package/lib/cli/commands/audio/index.d.ts +4 -0
- package/lib/cli/commands/audio/index.js +4 -0
- package/lib/cli/commands/image/builder.d.ts +14 -0
- package/lib/cli/commands/image/builder.js +6 -0
- package/lib/cli/commands/image/handler.d.ts +4 -0
- package/lib/cli/commands/image/handler.js +7 -0
- package/lib/cli/commands/image/index.d.ts +4 -0
- package/lib/cli/commands/image/index.js +4 -0
- package/lib/cli/commands/movie/builder.d.ts +18 -0
- package/lib/cli/commands/movie/builder.js +19 -0
- package/lib/cli/commands/movie/handler.d.ts +6 -0
- package/lib/cli/commands/movie/handler.js +12 -0
- package/lib/cli/commands/movie/index.d.ts +4 -0
- package/lib/cli/commands/movie/index.js +4 -0
- package/lib/cli/commands/pdf/builder.d.ts +18 -0
- package/lib/cli/commands/pdf/builder.js +19 -0
- package/lib/cli/commands/pdf/handler.d.ts +6 -0
- package/lib/cli/commands/pdf/handler.js +8 -0
- package/lib/cli/commands/pdf/index.d.ts +4 -0
- package/lib/cli/commands/pdf/index.js +4 -0
- package/lib/cli/commands/tool/index.d.ts +6 -0
- package/lib/cli/commands/tool/index.js +8 -0
- package/lib/cli/commands/tool/prompt/builder.d.ts +4 -0
- package/lib/cli/commands/tool/prompt/builder.js +11 -0
- package/lib/cli/commands/tool/prompt/handler.d.ts +4 -0
- package/lib/cli/commands/tool/prompt/handler.js +14 -0
- package/lib/cli/commands/tool/prompt/index.d.ts +4 -0
- package/lib/cli/commands/tool/prompt/index.js +4 -0
- package/lib/cli/commands/tool/schema/builder.d.ts +2 -0
- package/lib/cli/commands/tool/schema/builder.js +3 -0
- package/lib/cli/commands/tool/schema/handler.d.ts +2 -0
- package/lib/cli/commands/tool/schema/handler.js +12 -0
- package/lib/cli/commands/tool/schema/index.d.ts +4 -0
- package/lib/cli/commands/tool/schema/index.js +4 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +20 -0
- package/lib/cli/commands/tool/scripting/builder.js +63 -0
- package/lib/cli/commands/tool/scripting/handler.d.ts +12 -0
- package/lib/cli/commands/tool/scripting/handler.js +36 -0
- package/lib/cli/commands/tool/scripting/index.d.ts +4 -0
- package/lib/cli/commands/tool/scripting/index.js +4 -0
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +18 -0
- package/lib/cli/commands/tool/story_to_script/builder.js +53 -0
- package/lib/cli/commands/tool/story_to_script/handler.d.ts +11 -0
- package/lib/cli/commands/tool/story_to_script/handler.js +35 -0
- package/lib/cli/commands/tool/story_to_script/index.d.ts +4 -0
- package/lib/cli/commands/tool/story_to_script/index.js +4 -0
- package/lib/cli/commands/translate/builder.d.ts +14 -0
- package/lib/cli/commands/translate/builder.js +5 -0
- package/lib/cli/commands/translate/handler.d.ts +4 -0
- package/lib/cli/commands/translate/handler.js +6 -0
- package/lib/cli/commands/translate/index.d.ts +4 -0
- package/lib/cli/commands/translate/index.js +4 -0
- package/lib/cli/common.d.ts +6 -2
- package/lib/cli/common.js +18 -7
- package/lib/cli/helpers.d.ts +38 -0
- package/lib/cli/helpers.js +115 -0
- package/lib/cli/tool-args.d.ts +1 -0
- package/lib/cli/tool-args.js +1 -1
- package/lib/cli/tool-cli.js +8 -0
- package/lib/methods/mulmo_script.d.ts +0 -1
- package/lib/methods/mulmo_script.js +4 -7
- package/lib/methods/mulmo_script_template.js +2 -12
- package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
- package/lib/tools/create_mulmo_script_from_url.js +43 -14
- package/lib/tools/create_mulmo_script_interactively.js +14 -13
- package/lib/tools/dump_prompt.js +2 -0
- package/lib/tools/story_to_script.d.ts +10 -0
- package/lib/tools/story_to_script.js +201 -0
- package/lib/types/cli_types.d.ts +14 -0
- package/lib/types/cli_types.js +1 -0
- package/lib/types/schema.d.ts +493 -176
- package/lib/types/schema.js +37 -7
- package/lib/types/type.d.ts +6 -1
- package/lib/utils/const.d.ts +1 -0
- package/lib/utils/const.js +1 -0
- package/lib/utils/ffmpeg_utils.d.ts +12 -0
- package/lib/utils/ffmpeg_utils.js +63 -0
- package/lib/utils/file.d.ts +7 -3
- package/lib/utils/file.js +24 -5
- package/lib/utils/image_plugins/chart.js +6 -1
- package/lib/utils/image_plugins/html_tailwind.d.ts +3 -0
- package/lib/utils/image_plugins/html_tailwind.js +18 -0
- package/lib/utils/image_plugins/index.d.ts +2 -1
- package/lib/utils/image_plugins/index.js +2 -1
- package/lib/utils/image_plugins/mermaid.js +1 -1
- package/lib/utils/image_plugins/tailwind.d.ts +3 -0
- package/lib/utils/image_plugins/tailwind.js +18 -0
- package/lib/utils/image_plugins/text_slide.js +9 -2
- package/lib/utils/markdown.d.ts +1 -1
- package/lib/utils/markdown.js +8 -2
- package/lib/utils/preprocess.d.ts +23 -12
- package/lib/utils/preprocess.js +4 -0
- package/lib/utils/prompt.d.ts +15 -0
- package/lib/utils/prompt.js +57 -0
- package/lib/utils/utils.d.ts +2 -0
- package/lib/utils/utils.js +10 -0
- package/package.json +27 -23
package/lib/actions/audio.js
CHANGED
|
@@ -5,17 +5,19 @@ import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
|
5
5
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
6
6
|
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
7
7
|
import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
8
|
+
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
8
9
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
10
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
10
11
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
11
12
|
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
12
|
-
import { text2hash } from "../utils/utils.js";
|
|
13
|
+
import { text2hash, localizedText } from "../utils/utils.js";
|
|
13
14
|
const { default: __, ...vanillaAgents } = agents;
|
|
14
15
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
15
16
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
16
17
|
const provider_to_agent = {
|
|
17
18
|
nijivoice: "ttsNijivoiceAgent",
|
|
18
19
|
openai: "ttsOpenaiAgent",
|
|
20
|
+
google: "ttsGoogleAgent",
|
|
19
21
|
};
|
|
20
22
|
const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
21
23
|
if (beat.audio?.type === "audio") {
|
|
@@ -25,23 +27,30 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
25
27
|
}
|
|
26
28
|
throw new Error("Invalid audio source");
|
|
27
29
|
}
|
|
30
|
+
if (beat.text === "") {
|
|
31
|
+
return undefined; // It indicates that the audio is not needed.
|
|
32
|
+
}
|
|
28
33
|
return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
|
|
29
34
|
};
|
|
30
35
|
const preprocessor = (namedInputs) => {
|
|
31
|
-
const { beat, index, context, audioDirPath } = namedInputs;
|
|
32
|
-
const
|
|
36
|
+
const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
|
|
37
|
+
const { lang } = context;
|
|
33
38
|
const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
|
|
34
39
|
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
35
|
-
const
|
|
36
|
-
const
|
|
40
|
+
const text = localizedText(beat, multiLingual, lang);
|
|
41
|
+
const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
|
|
42
|
+
const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
37
43
|
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
38
44
|
studioBeat.audioFile = audioPath;
|
|
45
|
+
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
39
46
|
return {
|
|
40
47
|
ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
|
|
41
48
|
studioBeat,
|
|
42
49
|
voiceId,
|
|
43
50
|
speechOptions,
|
|
44
51
|
audioPath,
|
|
52
|
+
text,
|
|
53
|
+
needsTTS,
|
|
45
54
|
};
|
|
46
55
|
};
|
|
47
56
|
const graph_tts = {
|
|
@@ -50,16 +59,18 @@ const graph_tts = {
|
|
|
50
59
|
agent: preprocessor,
|
|
51
60
|
inputs: {
|
|
52
61
|
beat: ":beat",
|
|
62
|
+
studioBeat: ":studioBeat",
|
|
63
|
+
multiLingual: ":multiLingual",
|
|
53
64
|
index: ":__mapIndex",
|
|
54
65
|
context: ":context",
|
|
55
66
|
audioDirPath: ":audioDirPath",
|
|
56
67
|
},
|
|
57
68
|
},
|
|
58
69
|
tts: {
|
|
59
|
-
|
|
70
|
+
if: ":preprocessor.needsTTS",
|
|
60
71
|
agent: ":preprocessor.ttsAgent",
|
|
61
72
|
inputs: {
|
|
62
|
-
text: ":
|
|
73
|
+
text: ":preprocessor.text",
|
|
63
74
|
file: ":preprocessor.audioPath",
|
|
64
75
|
force: ":context.force",
|
|
65
76
|
},
|
|
@@ -85,13 +96,15 @@ const graph_data = {
|
|
|
85
96
|
agent: "mapAgent",
|
|
86
97
|
inputs: {
|
|
87
98
|
rows: ":context.studio.script.beats",
|
|
88
|
-
|
|
99
|
+
studioBeat: ":context.studio.beats",
|
|
100
|
+
multiLingual: ":context.studio.multiLingual",
|
|
89
101
|
audioDirPath: ":audioDirPath",
|
|
90
102
|
audioSegmentDirPath: ":audioSegmentDirPath",
|
|
91
103
|
context: ":context",
|
|
92
104
|
},
|
|
93
105
|
params: {
|
|
94
106
|
rowKey: "beat",
|
|
107
|
+
expandKeys: ["studioBeat", "multiLingual"],
|
|
95
108
|
},
|
|
96
109
|
graph: graph_tts,
|
|
97
110
|
},
|
|
@@ -101,7 +114,6 @@ const graph_data = {
|
|
|
101
114
|
map: ":map",
|
|
102
115
|
context: ":context",
|
|
103
116
|
combinedFileName: ":audioCombinedFilePath",
|
|
104
|
-
audioDirPath: ":audioDirPath",
|
|
105
117
|
},
|
|
106
118
|
isResult: true,
|
|
107
119
|
},
|
|
@@ -145,11 +157,11 @@ const agentFilters = [
|
|
|
145
157
|
},
|
|
146
158
|
];
|
|
147
159
|
export const audio = async (context) => {
|
|
148
|
-
const { studio, fileDirs } = context;
|
|
160
|
+
const { studio, fileDirs, lang } = context;
|
|
149
161
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
150
162
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
151
163
|
const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
|
|
152
|
-
const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename);
|
|
164
|
+
const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
|
|
153
165
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
154
166
|
mkdir(outDirPath);
|
|
155
167
|
mkdir(audioSegmentDirPath);
|
|
@@ -159,6 +171,7 @@ export const audio = async (context) => {
|
|
|
159
171
|
fileWriteAgent,
|
|
160
172
|
ttsOpenaiAgent,
|
|
161
173
|
ttsNijivoiceAgent,
|
|
174
|
+
ttsGoogleAgent,
|
|
162
175
|
addBGMAgent,
|
|
163
176
|
combineAudioFilesAgent,
|
|
164
177
|
}, { agentFilters });
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
|
+
import * as agents from "@graphai/vanilla";
|
|
3
|
+
import { getHTMLFile } from "../utils/file.js";
|
|
4
|
+
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
|
+
const { default: __, ...vanillaAgents } = agents;
|
|
6
|
+
const graph_data = {
|
|
7
|
+
version: 0.5,
|
|
8
|
+
nodes: {
|
|
9
|
+
context: {},
|
|
10
|
+
map: {
|
|
11
|
+
agent: "mapAgent",
|
|
12
|
+
inputs: { rows: ":context.studio.script.beats", context: ":context" },
|
|
13
|
+
isResult: true,
|
|
14
|
+
params: {
|
|
15
|
+
rowKey: "beat",
|
|
16
|
+
compositeResult: true,
|
|
17
|
+
},
|
|
18
|
+
graph: {
|
|
19
|
+
nodes: {
|
|
20
|
+
test: {
|
|
21
|
+
agent: async (namedInputs) => {
|
|
22
|
+
const { beat, context, index } = namedInputs;
|
|
23
|
+
const { fileDirs } = namedInputs.context;
|
|
24
|
+
const { caption } = context;
|
|
25
|
+
const { imageDirPath } = fileDirs;
|
|
26
|
+
const { canvasSize } = context.studio.script;
|
|
27
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
|
|
28
|
+
const template = getHTMLFile("caption");
|
|
29
|
+
const text = (() => {
|
|
30
|
+
const multiLingual = context.studio.multiLingual;
|
|
31
|
+
if (caption && multiLingual) {
|
|
32
|
+
return multiLingual[index].multiLingualTexts[caption].text;
|
|
33
|
+
}
|
|
34
|
+
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
|
|
35
|
+
return beat.text;
|
|
36
|
+
})();
|
|
37
|
+
const htmlData = interpolate(template, {
|
|
38
|
+
caption: text,
|
|
39
|
+
width: `${canvasSize.width}`,
|
|
40
|
+
height: `${canvasSize.height}`,
|
|
41
|
+
});
|
|
42
|
+
await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
|
|
43
|
+
context.studio.beats[index].captionFile = imagePath;
|
|
44
|
+
return imagePath;
|
|
45
|
+
},
|
|
46
|
+
inputs: {
|
|
47
|
+
beat: ":beat",
|
|
48
|
+
context: ":context",
|
|
49
|
+
index: ":__mapIndex",
|
|
50
|
+
},
|
|
51
|
+
isResult: true,
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
export const captions = async (context) => {
|
|
59
|
+
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
|
60
|
+
graph.injectValue("context", context);
|
|
61
|
+
await graph.run();
|
|
62
|
+
};
|
package/lib/actions/images.js
CHANGED
|
@@ -8,6 +8,7 @@ import imageGoogleAgent from "../agents/image_google_agent.js";
|
|
|
8
8
|
import imageOpenaiAgent from "../agents/image_openai_agent.js";
|
|
9
9
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
10
10
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
|
+
import { imagePrompt } from "../utils/prompt.js";
|
|
11
12
|
const { default: __, ...vanillaAgents } = agents;
|
|
12
13
|
dotenv.config();
|
|
13
14
|
// const openai = new OpenAI();
|
|
@@ -35,12 +36,12 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
35
36
|
return { path, ...returnValue };
|
|
36
37
|
}
|
|
37
38
|
}
|
|
38
|
-
const prompt = (beat
|
|
39
|
+
const prompt = imagePrompt(beat, imageParams.style);
|
|
39
40
|
return { path: imagePath, prompt, ...returnValue };
|
|
40
41
|
};
|
|
41
42
|
const graph_data = {
|
|
42
43
|
version: 0.5,
|
|
43
|
-
concurrency:
|
|
44
|
+
concurrency: 4,
|
|
44
45
|
nodes: {
|
|
45
46
|
context: {},
|
|
46
47
|
imageDirPath: {},
|
package/lib/actions/index.d.ts
CHANGED
package/lib/actions/index.js
CHANGED
package/lib/actions/movie.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import ffmpeg from "fluent-ffmpeg";
|
|
2
1
|
import { GraphAILogger } from "graphai";
|
|
3
2
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
4
3
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
5
|
-
|
|
6
|
-
const
|
|
4
|
+
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
5
|
+
// const isMac = process.platform === "darwin";
|
|
6
|
+
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
7
7
|
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
8
8
|
const videoId = `v${inputIndex}`;
|
|
9
9
|
return {
|
|
@@ -14,7 +14,9 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
|
14
14
|
`trim=duration=${duration}`,
|
|
15
15
|
"fps=30",
|
|
16
16
|
"setpts=PTS-STARTPTS",
|
|
17
|
-
`scale=${canvasInfo.width}
|
|
17
|
+
`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
|
|
18
|
+
// In case of the aspect ratio mismatch, we fill the extra space with black color.
|
|
19
|
+
`pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
|
|
18
20
|
"setsar=1",
|
|
19
21
|
"format=yuv420p",
|
|
20
22
|
]
|
|
@@ -29,112 +31,102 @@ export const getAudioPart = (inputIndex, duration, delay) => {
|
|
|
29
31
|
audioId,
|
|
30
32
|
audioPart: `[${inputIndex}:a]` +
|
|
31
33
|
`atrim=duration=${duration},` + // Trim to beat duration
|
|
32
|
-
`adelay=${delay}|${delay},` +
|
|
34
|
+
`adelay=${delay * 1000}|${delay * 1000},` +
|
|
33
35
|
`aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
|
|
34
36
|
`[${audioId}]`,
|
|
35
37
|
};
|
|
36
38
|
};
|
|
37
39
|
const getOutputOption = (audioId) => {
|
|
38
40
|
return [
|
|
39
|
-
"-preset
|
|
41
|
+
"-preset medium", // Changed from veryfast to medium for better compression
|
|
40
42
|
"-map [v]", // Map the video stream
|
|
41
43
|
`-map ${audioId}`, // Map the audio stream
|
|
42
44
|
`-c:v ${videoCodec}`, // Set video codec
|
|
45
|
+
...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
|
|
43
46
|
"-threads 8",
|
|
44
47
|
"-filter_threads 8",
|
|
45
|
-
"-b:v
|
|
48
|
+
"-b:v 2M", // Reduced from 5M to 2M
|
|
46
49
|
"-bufsize",
|
|
47
|
-
"
|
|
50
|
+
"4M", // Reduced buffer size
|
|
48
51
|
"-maxrate",
|
|
49
|
-
"
|
|
52
|
+
"3M", // Reduced from 7M to 3M
|
|
50
53
|
"-r 30", // Set frame rate
|
|
51
54
|
"-pix_fmt yuv420p", // Set pixel format for better compatibility
|
|
55
|
+
"-c:a aac", // Audio codec
|
|
56
|
+
"-b:a 128k", // Audio bitrate
|
|
52
57
|
];
|
|
53
58
|
};
|
|
54
|
-
const createVideo = (audioArtifactFilePath, outputVideoPath, studio) => {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
59
|
+
const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
|
|
60
|
+
const start = performance.now();
|
|
61
|
+
const ffmpegContext = FfmpegContextInit();
|
|
62
|
+
if (studio.beats.some((beat) => !beat.imageFile)) {
|
|
63
|
+
GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
67
|
+
// Add each image input
|
|
68
|
+
const filterComplexVideoIds = [];
|
|
69
|
+
const filterComplexAudioIds = [];
|
|
70
|
+
studio.beats.reduce((timestamp, beat, index) => {
|
|
71
|
+
if (!beat.imageFile || !beat.duration) {
|
|
72
|
+
throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
|
|
65
73
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
// Add each image input
|
|
73
|
-
const filterComplexParts = [];
|
|
74
|
-
const filterComplexVideoIds = [];
|
|
75
|
-
const filterComplexAudioIds = [];
|
|
76
|
-
studio.beats.reduce((timestamp, beat, index) => {
|
|
77
|
-
if (!beat.imageFile || !beat.duration) {
|
|
78
|
-
throw new Error(`beat.imageFile is not set: index=${index}`);
|
|
79
|
-
}
|
|
80
|
-
const inputIndex = addInput(beat.imageFile);
|
|
81
|
-
const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
|
|
82
|
-
const headOrTail = index === 0 || index === studio.beats.length - 1;
|
|
83
|
-
const duration = beat.duration + (headOrTail ? padding : 0);
|
|
84
|
-
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
|
|
85
|
-
filterComplexVideoIds.push(videoId);
|
|
86
|
-
filterComplexParts.push(videoPart);
|
|
87
|
-
if (mediaType === "movie") {
|
|
88
|
-
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp * 1000);
|
|
89
|
-
filterComplexAudioIds.push(audioId);
|
|
90
|
-
filterComplexParts.push(audioPart);
|
|
74
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
|
|
75
|
+
const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
|
|
76
|
+
const extraPadding = (() => {
|
|
77
|
+
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
78
|
+
if (index === 0) {
|
|
79
|
+
return studio.script.audioParams.introPadding;
|
|
91
80
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
// console.log("*** images", images.audioIds);
|
|
95
|
-
// Concatenate the trimmed images
|
|
96
|
-
filterComplexParts.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
|
|
97
|
-
const audioIndex = addInput(audioArtifactFilePath); // Add audio input
|
|
98
|
-
const artifactAudioId = `${audioIndex}:a`;
|
|
99
|
-
const ffmpegContextAudioId = (() => {
|
|
100
|
-
if (filterComplexAudioIds.length > 0) {
|
|
101
|
-
const mainAudioId = "mainaudio";
|
|
102
|
-
const compositeAudioId = "composite";
|
|
103
|
-
const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
|
|
104
|
-
filterComplexParts.push(`[${artifactAudioId}]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[${mainAudioId}]`);
|
|
105
|
-
filterComplexParts.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
106
|
-
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
81
|
+
else if (index === studio.beats.length - 1) {
|
|
82
|
+
return studio.script.audioParams.outroPadding;
|
|
107
83
|
}
|
|
108
|
-
return
|
|
84
|
+
return 0;
|
|
109
85
|
})();
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
.
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
});
|
|
86
|
+
const duration = beat.duration + extraPadding;
|
|
87
|
+
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
|
|
88
|
+
ffmpegContext.filterComplex.push(videoPart);
|
|
89
|
+
if (caption && beat.captionFile) {
|
|
90
|
+
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
|
|
91
|
+
const compositeVideoId = `c${index}`;
|
|
92
|
+
ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
|
|
93
|
+
filterComplexVideoIds.push(compositeVideoId);
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
filterComplexVideoIds.push(videoId);
|
|
97
|
+
}
|
|
98
|
+
if (mediaType === "movie") {
|
|
99
|
+
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
|
|
100
|
+
filterComplexAudioIds.push(audioId);
|
|
101
|
+
ffmpegContext.filterComplex.push(audioPart);
|
|
102
|
+
}
|
|
103
|
+
return timestamp + duration;
|
|
104
|
+
}, 0);
|
|
105
|
+
// console.log("*** images", images.audioIds);
|
|
106
|
+
// Concatenate the trimmed images
|
|
107
|
+
ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
|
|
108
|
+
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
109
|
+
const artifactAudioId = `${audioIndex}:a`;
|
|
110
|
+
const ffmpegContextAudioId = (() => {
|
|
111
|
+
if (filterComplexAudioIds.length > 0) {
|
|
112
|
+
const mainAudioId = "mainaudio";
|
|
113
|
+
const compositeAudioId = "composite";
|
|
114
|
+
const audioIds = filterComplexAudioIds.map((id) => `[${id}]`).join("");
|
|
115
|
+
FfmpegContextPushFormattedAudio(ffmpegContext, `[${artifactAudioId}]`, `[${mainAudioId}]`);
|
|
116
|
+
ffmpegContext.filterComplex.push(`[${mainAudioId}]${audioIds}amix=inputs=${filterComplexAudioIds.length + 1}:duration=first:dropout_transition=2[${compositeAudioId}]`);
|
|
117
|
+
return `[${compositeAudioId}]`; // notice that we need to use [mainaudio] instead of mainaudio
|
|
118
|
+
}
|
|
119
|
+
return artifactAudioId;
|
|
120
|
+
})();
|
|
121
|
+
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
|
|
122
|
+
const end = performance.now();
|
|
123
|
+
GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
|
|
132
124
|
};
|
|
133
125
|
export const movie = async (context) => {
|
|
134
|
-
const { studio, fileDirs } = context;
|
|
126
|
+
const { studio, fileDirs, caption } = context;
|
|
135
127
|
const { outDirPath } = fileDirs;
|
|
136
128
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
137
|
-
const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename);
|
|
138
|
-
await createVideo(audioArtifactFilePath, outputVideoPath, studio);
|
|
129
|
+
const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
|
|
130
|
+
await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
|
|
139
131
|
writingMessage(outputVideoPath);
|
|
140
132
|
};
|
package/lib/actions/pdf.js
CHANGED
|
@@ -2,7 +2,7 @@ import fs from "fs";
|
|
|
2
2
|
import path from "path";
|
|
3
3
|
import { rgb, PDFDocument } from "pdf-lib";
|
|
4
4
|
import fontkit from "@pdf-lib/fontkit";
|
|
5
|
-
import { chunkArray, isHttp } from "../utils/utils.js";
|
|
5
|
+
import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
|
|
6
6
|
import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
|
|
7
7
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
8
8
|
import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
|
|
@@ -19,7 +19,14 @@ const readImage = async (imagePath, pdfDoc) => {
|
|
|
19
19
|
return fs.readFileSync(imagePath);
|
|
20
20
|
})();
|
|
21
21
|
const ext = path.extname(imagePath).toLowerCase();
|
|
22
|
-
|
|
22
|
+
if (ext === ".jpg" || ext === ".jpeg") {
|
|
23
|
+
return await pdfDoc.embedJpg(imageBytes);
|
|
24
|
+
}
|
|
25
|
+
if (ext === ".png") {
|
|
26
|
+
return await pdfDoc.embedPng(imageBytes);
|
|
27
|
+
}
|
|
28
|
+
// workaround. TODO: movie, image should convert to png/jpeg image
|
|
29
|
+
return await pdfDoc.embedPng(fs.readFileSync("assets/images/mulmocast_credit.png"));
|
|
23
30
|
};
|
|
24
31
|
const pdfSlide = async (pageWidth, pageHeight, imagePaths, pdfDoc) => {
|
|
25
32
|
const cellRatio = pageHeight / pageWidth;
|
|
@@ -183,15 +190,18 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
|
|
|
183
190
|
return { width: 612, height: 792 };
|
|
184
191
|
};
|
|
185
192
|
export const pdf = async (context, pdfMode, pdfSize) => {
|
|
186
|
-
const { studio, fileDirs } = context;
|
|
193
|
+
const { studio, fileDirs, lang } = context;
|
|
194
|
+
const { multiLingual } = studio;
|
|
187
195
|
const { outDirPath } = fileDirs;
|
|
188
196
|
const { width: imageWidth, height: imageHeight } = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
189
197
|
const isLandscapeImage = imageWidth > imageHeight;
|
|
190
198
|
const isRotate = pdfMode === "handout";
|
|
191
199
|
const { width: pageWidth, height: pageHeight } = outputSize(pdfSize, isLandscapeImage, isRotate);
|
|
192
200
|
const imagePaths = studio.beats.map((beat) => beat.imageFile);
|
|
193
|
-
const texts = studio.script.beats.map((beat) =>
|
|
194
|
-
|
|
201
|
+
const texts = studio.script.beats.map((beat, index) => {
|
|
202
|
+
return localizedText(beat, multiLingual?.[index], lang);
|
|
203
|
+
});
|
|
204
|
+
const outputPdfPath = getOutputPdfFilePath(outDirPath, studio.filename, pdfMode, lang);
|
|
195
205
|
const pdfDoc = await PDFDocument.create();
|
|
196
206
|
pdfDoc.registerFontkit(fontkit);
|
|
197
207
|
const fontBytes = fs.readFileSync("assets/font/NotoSansJP-Regular.ttf");
|
package/lib/actions/translate.js
CHANGED
|
@@ -5,6 +5,7 @@ import { openAIAgent } from "@graphai/openai_agent";
|
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
|
+
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
8
9
|
const { default: __, ...vanillaAgents } = agents;
|
|
9
10
|
const translateGraph = {
|
|
10
11
|
version: 0.5,
|
|
@@ -25,7 +26,7 @@ const translateGraph = {
|
|
|
25
26
|
isResult: true,
|
|
26
27
|
agent: "mergeObjectAgent",
|
|
27
28
|
inputs: {
|
|
28
|
-
items: [":studio", {
|
|
29
|
+
items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
|
|
29
30
|
},
|
|
30
31
|
},
|
|
31
32
|
beatsMap: {
|
|
@@ -43,20 +44,21 @@ const translateGraph = {
|
|
|
43
44
|
graph: {
|
|
44
45
|
version: 0.5,
|
|
45
46
|
nodes: {
|
|
46
|
-
|
|
47
|
+
// for cache
|
|
48
|
+
multiLingual: {
|
|
47
49
|
agent: (namedInputs) => {
|
|
48
|
-
return namedInputs.rows[namedInputs.index];
|
|
50
|
+
return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
|
|
49
51
|
},
|
|
50
52
|
inputs: {
|
|
51
53
|
index: ":__mapIndex",
|
|
52
|
-
rows: ":studio.
|
|
54
|
+
rows: ":studio.multiLingual",
|
|
53
55
|
},
|
|
54
56
|
},
|
|
55
|
-
|
|
57
|
+
preprocessMultiLingual: {
|
|
56
58
|
agent: "mapAgent",
|
|
57
59
|
inputs: {
|
|
58
60
|
beat: ":beat",
|
|
59
|
-
|
|
61
|
+
multiLingual: ":multiLingual",
|
|
60
62
|
rows: ":targetLangs",
|
|
61
63
|
lang: ":lang.text",
|
|
62
64
|
studio: ":studio",
|
|
@@ -70,12 +72,12 @@ const translateGraph = {
|
|
|
70
72
|
nodes: {
|
|
71
73
|
localizedTexts: {
|
|
72
74
|
inputs: {
|
|
73
|
-
targetLang: ":targetLang",
|
|
74
|
-
beat: ":beat",
|
|
75
|
-
|
|
76
|
-
lang: ":lang",
|
|
77
|
-
system:
|
|
78
|
-
prompt:
|
|
75
|
+
targetLang: ":targetLang", // for cache
|
|
76
|
+
beat: ":beat", // for cache
|
|
77
|
+
multiLingual: ":multiLingual", // for cache
|
|
78
|
+
lang: ":lang", // for cache
|
|
79
|
+
system: translateSystemPrompt,
|
|
80
|
+
prompt: translatePrompts,
|
|
79
81
|
},
|
|
80
82
|
passThrough: {
|
|
81
83
|
lang: ":targetLang",
|
|
@@ -141,17 +143,17 @@ const translateGraph = {
|
|
|
141
143
|
mergeLocalizedText: {
|
|
142
144
|
agent: "arrayToObjectAgent",
|
|
143
145
|
inputs: {
|
|
144
|
-
items: ":
|
|
146
|
+
items: ":preprocessMultiLingual.ttsTexts",
|
|
145
147
|
},
|
|
146
148
|
params: {
|
|
147
149
|
key: "lang",
|
|
148
150
|
},
|
|
149
151
|
},
|
|
150
|
-
|
|
152
|
+
mergeMultiLingualData: {
|
|
151
153
|
isResult: true,
|
|
152
154
|
agent: "mergeObjectAgent",
|
|
153
155
|
inputs: {
|
|
154
|
-
items: [":
|
|
156
|
+
items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
|
|
155
157
|
},
|
|
156
158
|
},
|
|
157
159
|
},
|
|
@@ -169,14 +171,17 @@ const translateGraph = {
|
|
|
169
171
|
};
|
|
170
172
|
const localizedTextCacheAgentFilter = async (context, next) => {
|
|
171
173
|
const { namedInputs } = context;
|
|
172
|
-
const { targetLang, beat, lang,
|
|
174
|
+
const { targetLang, beat, lang, multiLingual } = namedInputs;
|
|
175
|
+
if (!beat.text) {
|
|
176
|
+
return { text: "" };
|
|
177
|
+
}
|
|
173
178
|
// The original text is unchanged and the target language text is present
|
|
174
|
-
if (
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
return { text:
|
|
179
|
+
if (multiLingual.multiLingualTexts &&
|
|
180
|
+
multiLingual.multiLingualTexts[lang] &&
|
|
181
|
+
multiLingual.multiLingualTexts[lang].text === beat.text &&
|
|
182
|
+
multiLingual.multiLingualTexts[targetLang] &&
|
|
183
|
+
multiLingual.multiLingualTexts[targetLang].text) {
|
|
184
|
+
return { text: multiLingual.multiLingualTexts[targetLang].text };
|
|
180
185
|
}
|
|
181
186
|
// same language
|
|
182
187
|
if (targetLang === lang) {
|
|
@@ -205,9 +210,10 @@ export const translate = async (context) => {
|
|
|
205
210
|
graph.injectValue("targetLangs", targetLangs);
|
|
206
211
|
graph.injectValue("outDirPath", outDirPath);
|
|
207
212
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
208
|
-
await graph.run();
|
|
213
|
+
const results = await graph.run();
|
|
209
214
|
writingMessage(outputStudioFilePath);
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
215
|
+
if (results.mergeStudioResult) {
|
|
216
|
+
context.studio = results.mergeStudioResult;
|
|
217
|
+
}
|
|
218
|
+
// console.log(JSON.stringify(results, null, 2));
|
|
213
219
|
};
|