mulmocast 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -2
- package/assets/templates/business.json +2 -181
- package/assets/templates/children_book.json +1 -128
- package/assets/templates/coding.json +2 -136
- package/assets/templates/comic_strips.json +1 -30
- package/assets/templates/ghibli_strips.json +1 -30
- package/assets/templates/sensei_and_taro.json +1 -118
- package/lib/actions/audio.js +41 -31
- package/lib/actions/captions.js +39 -26
- package/lib/actions/images.js +31 -11
- package/lib/actions/movie.js +30 -21
- package/lib/actions/pdf.js +11 -1
- package/lib/actions/translate.js +33 -18
- package/lib/agents/combine_audio_files_agent.js +19 -8
- package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
- package/lib/cli/commands/tool/scripting/builder.js +4 -4
- package/lib/cli/commands/tool/scripting/handler.d.ts +2 -1
- package/lib/cli/commands/tool/scripting/handler.js +3 -3
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +3 -1
- package/lib/cli/commands/tool/story_to_script/builder.js +12 -4
- package/lib/cli/commands/tool/story_to_script/handler.d.ts +3 -1
- package/lib/cli/commands/tool/story_to_script/handler.js +4 -3
- package/lib/methods/mulmo_script_template.d.ts +2 -2
- package/lib/methods/mulmo_script_template.js +2 -2
- package/lib/methods/mulmo_studio.d.ts +8 -0
- package/lib/methods/mulmo_studio.js +24 -0
- package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
- package/lib/tools/create_mulmo_script_from_url.js +7 -7
- package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
- package/lib/tools/create_mulmo_script_interactively.js +8 -8
- package/lib/tools/story_to_script.d.ts +5 -3
- package/lib/tools/story_to_script.js +90 -16
- package/lib/types/schema.d.ts +320 -1766
- package/lib/types/schema.js +41 -2
- package/lib/types/type.d.ts +4 -2
- package/lib/utils/const.d.ts +4 -0
- package/lib/utils/const.js +4 -0
- package/lib/utils/file.d.ts +1 -0
- package/lib/utils/file.js +16 -4
- package/lib/utils/filters.js +16 -11
- package/lib/utils/markdown.js +0 -2
- package/lib/utils/preprocess.d.ts +34 -15
- package/lib/utils/preprocess.js +3 -2
- package/lib/utils/prompt.d.ts +2 -1
- package/lib/utils/prompt.js +20 -3
- package/lib/utils/utils.d.ts +8 -5
- package/lib/utils/utils.js +27 -17
- package/package.json +2 -2
|
@@ -2,122 +2,5 @@
|
|
|
2
2
|
"title": "Student and Teacher",
|
|
3
3
|
"description": "Interactive discussion between a student and teacher",
|
|
4
4
|
"systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
|
|
5
|
-
"
|
|
6
|
-
"$mulmocast": {
|
|
7
|
-
"version": "1.0",
|
|
8
|
-
"credit": "closing"
|
|
9
|
-
},
|
|
10
|
-
"title": "韓国の戒厳令とその日本への影響",
|
|
11
|
-
"description": "韓国で最近発令された戒厳令とその可能性のある影響について、また日本の憲法に関する考慮事項との類似点を含めた洞察に満ちた議論。",
|
|
12
|
-
"lang": "ja",
|
|
13
|
-
"speechParams": {
|
|
14
|
-
"provider": "nijivoice",
|
|
15
|
-
"speakers": {
|
|
16
|
-
"Announcer": {
|
|
17
|
-
"displayName": {
|
|
18
|
-
"ja": "アナウンサー"
|
|
19
|
-
},
|
|
20
|
-
"voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62"
|
|
21
|
-
},
|
|
22
|
-
"Student": {
|
|
23
|
-
"displayName": {
|
|
24
|
-
"ja": "生徒"
|
|
25
|
-
},
|
|
26
|
-
"voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
|
|
27
|
-
},
|
|
28
|
-
"Teacher": {
|
|
29
|
-
"displayName": {
|
|
30
|
-
"ja": "先生"
|
|
31
|
-
},
|
|
32
|
-
"voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
},
|
|
36
|
-
"beats": [
|
|
37
|
-
{
|
|
38
|
-
"speaker": "Announcer",
|
|
39
|
-
"text": "今日は、韓国で起きた戒厳令について、太郎くんが先生に聞きます。"
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
"speaker": "Student",
|
|
43
|
-
"text": "先生、今日は韓国で起きた戒厳令のことを教えてもらえますか?"
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"speaker": "Teacher",
|
|
47
|
-
"text": "もちろんだよ、太郎くん。韓国で最近、大統領が「戒厳令」っていうのを突然宣言したんだ。"
|
|
48
|
-
},
|
|
49
|
-
{
|
|
50
|
-
"speaker": "Student",
|
|
51
|
-
"text": "戒厳令ってなんですか?"
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"speaker": "Teacher",
|
|
55
|
-
"text": "簡単に言うと、国がすごく危ない状態にあるとき、軍隊を使って人々の自由を制限するためのものなんだ。たとえば、政治活動を禁止したり、人の集まりを取り締まったりするんだよ。"
|
|
56
|
-
},
|
|
57
|
-
{
|
|
58
|
-
"speaker": "Student",
|
|
59
|
-
"text": "それって怖いですね。なんでそんなことをしたんですか?"
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"speaker": "Teacher",
|
|
63
|
-
"text": "大統領は「国会がうまく機能していないから」と言っていたけど、実際には自分の立場を守るために使ったように見えるんだ。それで、軍隊が国会に突入して、議員たちを捕まえようとしたんだ。"
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
"speaker": "Student",
|
|
67
|
-
"text": "ええっ!?国会議員を捕まえようとするなんて、すごく危ないことじゃないですか。"
|
|
68
|
-
},
|
|
69
|
-
{
|
|
70
|
-
"speaker": "Teacher",
|
|
71
|
-
"text": "その通りだよ。もし軍隊が国会を占拠していたら、国会で戒厳令を解除することもできなかったかもしれない。つまり、大統領がずっと自分の好きなように国を支配できるようになってしまうんだ。"
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"speaker": "Student",
|
|
75
|
-
"text": "韓国ではどうなったんですか?"
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
"speaker": "Teacher",
|
|
79
|
-
"text": "幸い、野党の議員や市民たちが急いで集まって抗議して、6時間後に戒厳令は解除されたんだ。でも、ほんの少しの違いで、韓国の民主主義が大きく傷つけられるところだったんだよ。"
|
|
80
|
-
},
|
|
81
|
-
{
|
|
82
|
-
"speaker": "Student",
|
|
83
|
-
"text": "それは大変なことですね…。日本ではそんなこと起きないんですか?"
|
|
84
|
-
},
|
|
85
|
-
{
|
|
86
|
-
"speaker": "Teacher",
|
|
87
|
-
"text": "実はね、今、日本でも似たような話があるんだよ。自民党が「緊急事態宣言」を憲法に追加しようとしているんだ。"
|
|
88
|
-
},
|
|
89
|
-
{
|
|
90
|
-
"speaker": "Student",
|
|
91
|
-
"text": "緊急事態宣言って、韓国の戒厳令と同じようなものなんですか?"
|
|
92
|
-
},
|
|
93
|
-
{
|
|
94
|
-
"speaker": "Teacher",
|
|
95
|
-
"text": "似ている部分があるね。たとえば、総理大臣が「社会秩序の混乱の危険があるから」と言えば、特別な権限を使って国を動かすことができるんだ。法律と同じ力を持つ命令を出したり、地方自治体に指示を出したりすることができるんだよ。"
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
"speaker": "Student",
|
|
99
|
-
"text": "それって便利そうですけど、なんだか心配です。"
|
|
100
|
-
},
|
|
101
|
-
{
|
|
102
|
-
"speaker": "Teacher",
|
|
103
|
-
"text": "そうだね。もちろん、緊急時には素早い対応が必要だから便利な面もあるけど、その権限が濫用されると、とても危険なんだ。たとえば、総理大臣が自分に都合のいいように国を動かしたり、国民の自由を奪ったりすることができるようになってしまうかもしれない。"
|
|
104
|
-
},
|
|
105
|
-
{
|
|
106
|
-
"speaker": "Student",
|
|
107
|
-
"text": "韓国みたいに、軍隊が政治に口を出してくることもあり得るんですか?"
|
|
108
|
-
},
|
|
109
|
-
{
|
|
110
|
-
"speaker": "Teacher",
|
|
111
|
-
"text": "完全にあり得ないとは言えないからこそ、注意が必要なんだ。私たち国民は、自民党の改憲案が権力の濫用を防ぐための適切な制限を含んでいるのかをしっかり監視し、声を上げることが求められる。民主主義が損なわれるのを防ぐために、私たち一人ひとりが積極的に関心を持つことが大切なんだよ。"
|
|
112
|
-
},
|
|
113
|
-
{
|
|
114
|
-
"speaker": "Student",
|
|
115
|
-
"text": "ありがとうございます。とても良い勉強になりました。"
|
|
116
|
-
},
|
|
117
|
-
{
|
|
118
|
-
"speaker": "Announcer",
|
|
119
|
-
"text": "ご視聴、ありがとうございました。次回の放送もお楽しみに。"
|
|
120
|
-
}
|
|
121
|
-
]
|
|
122
|
-
}
|
|
5
|
+
"scriptName": "sensei_and_taro.json"
|
|
123
6
|
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -11,6 +11,7 @@ import { MulmoScriptMethods } from "../methods/index.js";
|
|
|
11
11
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
12
12
|
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
13
13
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
14
15
|
const { default: __, ...vanillaAgents } = agents;
|
|
15
16
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
16
17
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
@@ -73,11 +74,14 @@ const graph_tts = {
|
|
|
73
74
|
text: ":preprocessor.text",
|
|
74
75
|
file: ":preprocessor.audioPath",
|
|
75
76
|
force: ":context.force",
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
studio: ":context.studio", // for cache
|
|
78
|
+
index: ":__mapIndex", // for cache
|
|
79
|
+
sessionType: "audio", // for cache
|
|
80
|
+
params: {
|
|
81
|
+
voice: ":preprocessor.voiceId",
|
|
82
|
+
speed: ":preprocessor.speechOptions.speed",
|
|
83
|
+
instructions: ":preprocessor.speechOptions.instruction",
|
|
84
|
+
},
|
|
81
85
|
},
|
|
82
86
|
},
|
|
83
87
|
},
|
|
@@ -157,30 +161,36 @@ const agentFilters = [
|
|
|
157
161
|
},
|
|
158
162
|
];
|
|
159
163
|
export const audio = async (context) => {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
164
|
+
try {
|
|
165
|
+
MulmoStudioMethods.setSessionState(context.studio, "audio", true);
|
|
166
|
+
const { studio, fileDirs, lang } = context;
|
|
167
|
+
const { outDirPath, audioDirPath } = fileDirs;
|
|
168
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
169
|
+
const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
|
|
170
|
+
const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
|
|
171
|
+
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
172
|
+
mkdir(outDirPath);
|
|
173
|
+
mkdir(audioSegmentDirPath);
|
|
174
|
+
graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
|
|
175
|
+
const graph = new GraphAI(graph_data, {
|
|
176
|
+
...vanillaAgents,
|
|
177
|
+
fileWriteAgent,
|
|
178
|
+
ttsOpenaiAgent,
|
|
179
|
+
ttsNijivoiceAgent,
|
|
180
|
+
ttsGoogleAgent,
|
|
181
|
+
addBGMAgent,
|
|
182
|
+
combineAudioFilesAgent,
|
|
183
|
+
}, { agentFilters });
|
|
184
|
+
graph.injectValue("context", context);
|
|
185
|
+
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
186
|
+
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
|
187
|
+
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
188
|
+
graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
|
|
189
|
+
graph.injectValue("audioDirPath", audioDirPath);
|
|
190
|
+
await graph.run();
|
|
191
|
+
writingMessage(audioCombinedFilePath);
|
|
192
|
+
}
|
|
193
|
+
finally {
|
|
194
|
+
MulmoStudioMethods.setSessionState(context.studio, "audio", false);
|
|
195
|
+
}
|
|
186
196
|
};
|
package/lib/actions/captions.js
CHANGED
|
@@ -2,6 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
|
|
|
2
2
|
import * as agents from "@graphai/vanilla";
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
5
6
|
const { default: __, ...vanillaAgents } = agents;
|
|
6
7
|
const graph_data = {
|
|
7
8
|
version: 0.5,
|
|
@@ -17,31 +18,37 @@ const graph_data = {
|
|
|
17
18
|
},
|
|
18
19
|
graph: {
|
|
19
20
|
nodes: {
|
|
20
|
-
|
|
21
|
+
generateCaption: {
|
|
21
22
|
agent: async (namedInputs) => {
|
|
22
23
|
const { beat, context, index } = namedInputs;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
24
|
+
try {
|
|
25
|
+
MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
|
|
26
|
+
const { fileDirs } = namedInputs.context;
|
|
27
|
+
const { caption } = context;
|
|
28
|
+
const { imageDirPath } = fileDirs;
|
|
29
|
+
const { canvasSize } = context.studio.script;
|
|
30
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
|
|
31
|
+
const template = getHTMLFile("caption");
|
|
32
|
+
const text = (() => {
|
|
33
|
+
const multiLingual = context.studio.multiLingual;
|
|
34
|
+
if (caption && multiLingual) {
|
|
35
|
+
return multiLingual[index].multiLingualTexts[caption].text;
|
|
36
|
+
}
|
|
37
|
+
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
|
|
38
|
+
return beat.text;
|
|
39
|
+
})();
|
|
40
|
+
const htmlData = interpolate(template, {
|
|
41
|
+
caption: text,
|
|
42
|
+
width: `${canvasSize.width}`,
|
|
43
|
+
height: `${canvasSize.height}`,
|
|
44
|
+
});
|
|
45
|
+
await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
|
|
46
|
+
context.studio.beats[index].captionFile = imagePath;
|
|
47
|
+
return imagePath;
|
|
48
|
+
}
|
|
49
|
+
finally {
|
|
50
|
+
MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
|
|
51
|
+
}
|
|
45
52
|
},
|
|
46
53
|
inputs: {
|
|
47
54
|
beat: ":beat",
|
|
@@ -56,7 +63,13 @@ const graph_data = {
|
|
|
56
63
|
},
|
|
57
64
|
};
|
|
58
65
|
export const captions = async (context) => {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
66
|
+
try {
|
|
67
|
+
MulmoStudioMethods.setSessionState(context.studio, "caption", true);
|
|
68
|
+
const graph = new GraphAI(graph_data, { ...vanillaAgents });
|
|
69
|
+
graph.injectValue("context", context);
|
|
70
|
+
await graph.run();
|
|
71
|
+
}
|
|
72
|
+
finally {
|
|
73
|
+
MulmoStudioMethods.setSessionState(context.studio, "caption", false);
|
|
74
|
+
}
|
|
62
75
|
};
|
package/lib/actions/images.js
CHANGED
|
@@ -13,6 +13,7 @@ const { default: __, ...vanillaAgents } = agents;
|
|
|
13
13
|
dotenv.config();
|
|
14
14
|
// const openai = new OpenAI();
|
|
15
15
|
import { GoogleAuth } from "google-auth-library";
|
|
16
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
16
17
|
const htmlStyle = (script, beat) => {
|
|
17
18
|
return {
|
|
18
19
|
canvasSize: MulmoScriptMethods.getCanvasSize(script),
|
|
@@ -30,10 +31,16 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
30
31
|
if (beat.image) {
|
|
31
32
|
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
32
33
|
if (plugin) {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
try {
|
|
35
|
+
MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
|
|
36
|
+
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
37
|
+
const path = await plugin.process(processorParams);
|
|
38
|
+
// undefined prompt indicates that image generation is not needed
|
|
39
|
+
return { path, ...returnValue };
|
|
40
|
+
}
|
|
41
|
+
finally {
|
|
42
|
+
MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
|
|
43
|
+
}
|
|
37
44
|
}
|
|
38
45
|
}
|
|
39
46
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
@@ -71,17 +78,21 @@ const graph_data = {
|
|
|
71
78
|
imageGenerator: {
|
|
72
79
|
if: ":preprocessor.prompt",
|
|
73
80
|
agent: ":imageAgentInfo.agent",
|
|
74
|
-
|
|
75
|
-
model: ":preprocessor.imageParams.model",
|
|
76
|
-
size: ":preprocessor.imageParams.size",
|
|
77
|
-
moderation: ":preprocessor.imageParams.moderation",
|
|
78
|
-
aspectRatio: ":preprocessor.aspectRatio",
|
|
79
|
-
},
|
|
81
|
+
retry: 3,
|
|
80
82
|
inputs: {
|
|
81
83
|
prompt: ":preprocessor.prompt",
|
|
82
84
|
file: ":preprocessor.path", // only for fileCacheAgentFilter
|
|
83
85
|
text: ":preprocessor.prompt", // only for fileCacheAgentFilter
|
|
84
86
|
force: ":context.force",
|
|
87
|
+
studio: ":context.studio", // for cache
|
|
88
|
+
index: ":__mapIndex", // for cache
|
|
89
|
+
sessionType: "image", // for cache
|
|
90
|
+
params: {
|
|
91
|
+
model: ":preprocessor.imageParams.model",
|
|
92
|
+
size: ":preprocessor.imageParams.size",
|
|
93
|
+
moderation: ":preprocessor.imageParams.moderation",
|
|
94
|
+
aspectRatio: ":preprocessor.aspectRatio",
|
|
95
|
+
},
|
|
85
96
|
},
|
|
86
97
|
defaultValue: {},
|
|
87
98
|
},
|
|
@@ -133,7 +144,7 @@ const googleAuth = async () => {
|
|
|
133
144
|
const accessToken = await client.getAccessToken();
|
|
134
145
|
return accessToken.token;
|
|
135
146
|
};
|
|
136
|
-
|
|
147
|
+
const generateImages = async (context) => {
|
|
137
148
|
const { studio, fileDirs } = context;
|
|
138
149
|
const { outDirPath, imageDirPath } = fileDirs;
|
|
139
150
|
mkdir(`${imageDirPath}/${studio.filename}`);
|
|
@@ -172,3 +183,12 @@ export const images = async (context) => {
|
|
|
172
183
|
});
|
|
173
184
|
await graph.run();
|
|
174
185
|
};
|
|
186
|
+
export const images = async (context) => {
|
|
187
|
+
try {
|
|
188
|
+
MulmoStudioMethods.setSessionState(context.studio, "image", true);
|
|
189
|
+
await generateImages(context);
|
|
190
|
+
}
|
|
191
|
+
finally {
|
|
192
|
+
MulmoStudioMethods.setSessionState(context.studio, "image", false);
|
|
193
|
+
}
|
|
194
|
+
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -2,27 +2,28 @@ import { GraphAILogger } from "graphai";
|
|
|
2
2
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
3
3
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
4
4
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
5
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
5
6
|
// const isMac = process.platform === "darwin";
|
|
6
7
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
7
8
|
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
8
9
|
const videoId = `v${inputIndex}`;
|
|
10
|
+
const videoFilters = [];
|
|
11
|
+
// Handle different media types
|
|
12
|
+
if (mediaType === "image") {
|
|
13
|
+
videoFilters.push("loop=loop=-1:size=1:start=0");
|
|
14
|
+
}
|
|
15
|
+
else if (mediaType === "movie") {
|
|
16
|
+
// For videos, extend with last frame if shorter than required duration
|
|
17
|
+
// tpad will extend the video by cloning the last frame, then trim will ensure exact duration
|
|
18
|
+
videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
|
|
19
|
+
}
|
|
20
|
+
// Common filters for all media types
|
|
21
|
+
videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
|
|
22
|
+
// In case of the aspect ratio mismatch, we fill the extra space with black color.
|
|
23
|
+
`pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
|
|
9
24
|
return {
|
|
10
25
|
videoId,
|
|
11
|
-
videoPart: `[${inputIndex}:v]` +
|
|
12
|
-
[
|
|
13
|
-
mediaType === "image" ? "loop=loop=-1:size=1:start=0" : "",
|
|
14
|
-
`trim=duration=${duration}`,
|
|
15
|
-
"fps=30",
|
|
16
|
-
"setpts=PTS-STARTPTS",
|
|
17
|
-
`scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
|
|
18
|
-
// In case of the aspect ratio mismatch, we fill the extra space with black color.
|
|
19
|
-
`pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
|
|
20
|
-
"setsar=1",
|
|
21
|
-
"format=yuv420p",
|
|
22
|
-
]
|
|
23
|
-
.filter((a) => a)
|
|
24
|
-
.join(",") +
|
|
25
|
-
`[${videoId}]`,
|
|
26
|
+
videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
|
|
26
27
|
};
|
|
27
28
|
};
|
|
28
29
|
export const getAudioPart = (inputIndex, duration, delay) => {
|
|
@@ -121,12 +122,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
121
122
|
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
|
|
122
123
|
const end = performance.now();
|
|
123
124
|
GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
|
|
125
|
+
GraphAILogger.info(studio.script.title);
|
|
126
|
+
GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
124
127
|
};
|
|
125
128
|
export const movie = async (context) => {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
129
|
+
MulmoStudioMethods.setSessionState(context.studio, "video", true);
|
|
130
|
+
try {
|
|
131
|
+
const { studio, fileDirs, caption } = context;
|
|
132
|
+
const { outDirPath } = fileDirs;
|
|
133
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
134
|
+
const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
|
|
135
|
+
await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
|
|
136
|
+
writingMessage(outputVideoPath);
|
|
137
|
+
}
|
|
138
|
+
finally {
|
|
139
|
+
MulmoStudioMethods.setSessionState(context.studio, "video", false);
|
|
140
|
+
}
|
|
132
141
|
};
|
package/lib/actions/pdf.js
CHANGED
|
@@ -6,6 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
|
|
|
6
6
|
import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
|
|
7
7
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
8
8
|
import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
|
|
9
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
9
10
|
const imagesPerPage = 4;
|
|
10
11
|
const offset = 10;
|
|
11
12
|
const handoutImageRatio = 0.5;
|
|
@@ -189,7 +190,7 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
|
|
|
189
190
|
}
|
|
190
191
|
return { width: 612, height: 792 };
|
|
191
192
|
};
|
|
192
|
-
|
|
193
|
+
const generatePdf = async (context, pdfMode, pdfSize) => {
|
|
193
194
|
const { studio, fileDirs, lang } = context;
|
|
194
195
|
const { multiLingual } = studio;
|
|
195
196
|
const { outDirPath } = fileDirs;
|
|
@@ -219,3 +220,12 @@ export const pdf = async (context, pdfMode, pdfSize) => {
|
|
|
219
220
|
fs.writeFileSync(outputPdfPath, pdfBytes);
|
|
220
221
|
writingMessage(outputPdfPath);
|
|
221
222
|
};
|
|
223
|
+
export const pdf = async (context, pdfMode, pdfSize) => {
|
|
224
|
+
try {
|
|
225
|
+
MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
|
|
226
|
+
await generatePdf(context, pdfMode, pdfSize);
|
|
227
|
+
}
|
|
228
|
+
finally {
|
|
229
|
+
MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
|
|
230
|
+
}
|
|
231
|
+
};
|
package/lib/actions/translate.js
CHANGED
|
@@ -6,6 +6,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
|
+
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
9
10
|
const { default: __, ...vanillaAgents } = agents;
|
|
10
11
|
const translateGraph = {
|
|
11
12
|
version: 0.5,
|
|
@@ -62,6 +63,7 @@ const translateGraph = {
|
|
|
62
63
|
rows: ":targetLangs",
|
|
63
64
|
lang: ":lang.text",
|
|
64
65
|
studio: ":studio",
|
|
66
|
+
beatIndex: ":__mapIndex",
|
|
65
67
|
},
|
|
66
68
|
params: {
|
|
67
69
|
compositeResult: true,
|
|
@@ -76,6 +78,8 @@ const translateGraph = {
|
|
|
76
78
|
beat: ":beat", // for cache
|
|
77
79
|
multiLingual: ":multiLingual", // for cache
|
|
78
80
|
lang: ":lang", // for cache
|
|
81
|
+
beatIndex: ":beatIndex", // for cache
|
|
82
|
+
studio: ":studio", // for cache
|
|
79
83
|
system: translateSystemPrompt,
|
|
80
84
|
prompt: translatePrompts,
|
|
81
85
|
},
|
|
@@ -171,7 +175,7 @@ const translateGraph = {
|
|
|
171
175
|
};
|
|
172
176
|
const localizedTextCacheAgentFilter = async (context, next) => {
|
|
173
177
|
const { namedInputs } = context;
|
|
174
|
-
const { targetLang, beat, lang, multiLingual } = namedInputs;
|
|
178
|
+
const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
|
|
175
179
|
if (!beat.text) {
|
|
176
180
|
return { text: "" };
|
|
177
181
|
}
|
|
@@ -187,7 +191,13 @@ const localizedTextCacheAgentFilter = async (context, next) => {
|
|
|
187
191
|
if (targetLang === lang) {
|
|
188
192
|
return { text: beat.text };
|
|
189
193
|
}
|
|
190
|
-
|
|
194
|
+
try {
|
|
195
|
+
MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
|
|
196
|
+
return await next(context);
|
|
197
|
+
}
|
|
198
|
+
finally {
|
|
199
|
+
MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
|
|
200
|
+
}
|
|
191
201
|
};
|
|
192
202
|
const agentFilters = [
|
|
193
203
|
{
|
|
@@ -199,21 +209,26 @@ const agentFilters = [
|
|
|
199
209
|
const defaultLang = "en";
|
|
200
210
|
const targetLangs = ["ja", "en"];
|
|
201
211
|
export const translate = async (context) => {
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
212
|
+
try {
|
|
213
|
+
MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
|
|
214
|
+
const { studio, fileDirs } = context;
|
|
215
|
+
const { outDirPath } = fileDirs;
|
|
216
|
+
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
217
|
+
mkdir(outDirPath);
|
|
218
|
+
assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
219
|
+
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
|
|
220
|
+
graph.injectValue("studio", studio);
|
|
221
|
+
graph.injectValue("defaultLang", defaultLang);
|
|
222
|
+
graph.injectValue("targetLangs", targetLangs);
|
|
223
|
+
graph.injectValue("outDirPath", outDirPath);
|
|
224
|
+
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
225
|
+
const results = await graph.run();
|
|
226
|
+
writingMessage(outputStudioFilePath);
|
|
227
|
+
if (results.mergeStudioResult) {
|
|
228
|
+
context.studio = results.mergeStudioResult;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
finally {
|
|
232
|
+
MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
|
|
217
233
|
}
|
|
218
|
-
// console.log(JSON.stringify(results, null, 2));
|
|
219
234
|
};
|
|
@@ -9,19 +9,34 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
9
9
|
const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
|
|
10
10
|
ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
|
|
11
11
|
const inputIds = (await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
|
|
12
|
+
const beat = context.studio.script.beats[index];
|
|
12
13
|
const isClosingGap = index === context.studio.beats.length - 2;
|
|
13
14
|
if (studioBeat.audioFile) {
|
|
14
15
|
const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
|
|
15
16
|
const padding = (() => {
|
|
17
|
+
if (beat.audioParams?.padding !== undefined) {
|
|
18
|
+
return beat.audioParams.padding;
|
|
19
|
+
}
|
|
16
20
|
if (index === context.studio.beats.length - 1) {
|
|
17
21
|
return 0;
|
|
18
22
|
}
|
|
19
23
|
return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
|
|
20
24
|
})();
|
|
21
|
-
|
|
22
|
-
|
|
25
|
+
const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
|
|
26
|
+
const totalPadding = await (async () => {
|
|
27
|
+
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
28
|
+
const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
|
|
29
|
+
const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
|
|
30
|
+
if (movieDuration > audioDuration) {
|
|
31
|
+
return padding + (movieDuration - audioDuration);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return padding;
|
|
35
|
+
})();
|
|
36
|
+
studioBeat.duration = audioDuration + totalPadding;
|
|
37
|
+
if (totalPadding > 0) {
|
|
23
38
|
const silentId = silentIds.pop();
|
|
24
|
-
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${
|
|
39
|
+
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${totalPadding}[padding_${index}]`);
|
|
25
40
|
return [audioId, `[padding_${index}]`];
|
|
26
41
|
}
|
|
27
42
|
else {
|
|
@@ -30,16 +45,12 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
30
45
|
}
|
|
31
46
|
else {
|
|
32
47
|
// NOTE: We come here when the text is empty and no audio property is specified.
|
|
33
|
-
studioBeat.duration =
|
|
48
|
+
studioBeat.duration = beat.duration ?? 1.0;
|
|
34
49
|
const silentId = silentIds.pop();
|
|
35
50
|
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${studioBeat.duration}[silent_${index}]`);
|
|
36
51
|
return [`[silent_${index}]`];
|
|
37
52
|
}
|
|
38
53
|
}))).flat();
|
|
39
|
-
// HACK: Because the last beat may not use an silent audio, we need to consume it to make ffmpeg happy.
|
|
40
|
-
if (silentIds.length > 1) {
|
|
41
|
-
throw new Error("UNEXPECTED: silentIds.length > 1");
|
|
42
|
-
}
|
|
43
54
|
silentIds.forEach((silentId) => {
|
|
44
55
|
GraphAILogger.log(`Using extra silentId: ${silentId}`);
|
|
45
56
|
ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}[silent_extra]`);
|