mulmocast 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +39 -2
  2. package/assets/templates/business.json +2 -181
  3. package/assets/templates/children_book.json +1 -128
  4. package/assets/templates/coding.json +2 -136
  5. package/assets/templates/comic_strips.json +1 -30
  6. package/assets/templates/ghibli_strips.json +1 -30
  7. package/assets/templates/sensei_and_taro.json +1 -118
  8. package/lib/actions/audio.js +41 -31
  9. package/lib/actions/captions.js +39 -26
  10. package/lib/actions/images.js +31 -11
  11. package/lib/actions/movie.js +30 -21
  12. package/lib/actions/pdf.js +11 -1
  13. package/lib/actions/translate.js +33 -18
  14. package/lib/agents/combine_audio_files_agent.js +19 -8
  15. package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
  16. package/lib/cli/commands/tool/scripting/builder.js +4 -4
  17. package/lib/cli/commands/tool/scripting/handler.d.ts +2 -1
  18. package/lib/cli/commands/tool/scripting/handler.js +3 -3
  19. package/lib/cli/commands/tool/story_to_script/builder.d.ts +3 -1
  20. package/lib/cli/commands/tool/story_to_script/builder.js +12 -4
  21. package/lib/cli/commands/tool/story_to_script/handler.d.ts +3 -1
  22. package/lib/cli/commands/tool/story_to_script/handler.js +4 -3
  23. package/lib/methods/mulmo_script_template.d.ts +2 -2
  24. package/lib/methods/mulmo_script_template.js +2 -2
  25. package/lib/methods/mulmo_studio.d.ts +8 -0
  26. package/lib/methods/mulmo_studio.js +24 -0
  27. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -1
  28. package/lib/tools/create_mulmo_script_from_url.js +7 -7
  29. package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
  30. package/lib/tools/create_mulmo_script_interactively.js +8 -8
  31. package/lib/tools/story_to_script.d.ts +5 -3
  32. package/lib/tools/story_to_script.js +90 -16
  33. package/lib/types/schema.d.ts +320 -1766
  34. package/lib/types/schema.js +41 -2
  35. package/lib/types/type.d.ts +4 -2
  36. package/lib/utils/const.d.ts +4 -0
  37. package/lib/utils/const.js +4 -0
  38. package/lib/utils/file.d.ts +1 -0
  39. package/lib/utils/file.js +16 -4
  40. package/lib/utils/filters.js +16 -11
  41. package/lib/utils/markdown.js +0 -2
  42. package/lib/utils/preprocess.d.ts +34 -15
  43. package/lib/utils/preprocess.js +3 -2
  44. package/lib/utils/prompt.d.ts +2 -1
  45. package/lib/utils/prompt.js +20 -3
  46. package/lib/utils/utils.d.ts +8 -5
  47. package/lib/utils/utils.js +27 -17
  48. package/package.json +2 -2
@@ -2,122 +2,5 @@
2
2
  "title": "Student and Teacher",
3
3
  "description": "Interactive discussion between a student and teacher",
4
4
  "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
5
- "script": {
6
- "$mulmocast": {
7
- "version": "1.0",
8
- "credit": "closing"
9
- },
10
- "title": "韓国の戒厳令とその日本への影響",
11
- "description": "韓国で最近発令された戒厳令とその可能性のある影響について、また日本の憲法に関する考慮事項との類似点を含めた洞察に満ちた議論。",
12
- "lang": "ja",
13
- "speechParams": {
14
- "provider": "nijivoice",
15
- "speakers": {
16
- "Announcer": {
17
- "displayName": {
18
- "ja": "アナウンサー"
19
- },
20
- "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62"
21
- },
22
- "Student": {
23
- "displayName": {
24
- "ja": "生徒"
25
- },
26
- "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
27
- },
28
- "Teacher": {
29
- "displayName": {
30
- "ja": "先生"
31
- },
32
- "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
33
- }
34
- }
35
- },
36
- "beats": [
37
- {
38
- "speaker": "Announcer",
39
- "text": "今日は、韓国で起きた戒厳令について、太郎くんが先生に聞きます。"
40
- },
41
- {
42
- "speaker": "Student",
43
- "text": "先生、今日は韓国で起きた戒厳令のことを教えてもらえますか?"
44
- },
45
- {
46
- "speaker": "Teacher",
47
- "text": "もちろんだよ、太郎くん。韓国で最近、大統領が「戒厳令」っていうのを突然宣言したんだ。"
48
- },
49
- {
50
- "speaker": "Student",
51
- "text": "戒厳令ってなんですか?"
52
- },
53
- {
54
- "speaker": "Teacher",
55
- "text": "簡単に言うと、国がすごく危ない状態にあるとき、軍隊を使って人々の自由を制限するためのものなんだ。たとえば、政治活動を禁止したり、人の集まりを取り締まったりするんだよ。"
56
- },
57
- {
58
- "speaker": "Student",
59
- "text": "それって怖いですね。なんでそんなことをしたんですか?"
60
- },
61
- {
62
- "speaker": "Teacher",
63
- "text": "大統領は「国会がうまく機能していないから」と言っていたけど、実際には自分の立場を守るために使ったように見えるんだ。それで、軍隊が国会に突入して、議員たちを捕まえようとしたんだ。"
64
- },
65
- {
66
- "speaker": "Student",
67
- "text": "ええっ!?国会議員を捕まえようとするなんて、すごく危ないことじゃないですか。"
68
- },
69
- {
70
- "speaker": "Teacher",
71
- "text": "その通りだよ。もし軍隊が国会を占拠していたら、国会で戒厳令を解除することもできなかったかもしれない。つまり、大統領がずっと自分の好きなように国を支配できるようになってしまうんだ。"
72
- },
73
- {
74
- "speaker": "Student",
75
- "text": "韓国ではどうなったんですか?"
76
- },
77
- {
78
- "speaker": "Teacher",
79
- "text": "幸い、野党の議員や市民たちが急いで集まって抗議して、6時間後に戒厳令は解除されたんだ。でも、ほんの少しの違いで、韓国の民主主義が大きく傷つけられるところだったんだよ。"
80
- },
81
- {
82
- "speaker": "Student",
83
- "text": "それは大変なことですね…。日本ではそんなこと起きないんですか?"
84
- },
85
- {
86
- "speaker": "Teacher",
87
- "text": "実はね、今、日本でも似たような話があるんだよ。自民党が「緊急事態宣言」を憲法に追加しようとしているんだ。"
88
- },
89
- {
90
- "speaker": "Student",
91
- "text": "緊急事態宣言って、韓国の戒厳令と同じようなものなんですか?"
92
- },
93
- {
94
- "speaker": "Teacher",
95
- "text": "似ている部分があるね。たとえば、総理大臣が「社会秩序の混乱の危険があるから」と言えば、特別な権限を使って国を動かすことができるんだ。法律と同じ力を持つ命令を出したり、地方自治体に指示を出したりすることができるんだよ。"
96
- },
97
- {
98
- "speaker": "Student",
99
- "text": "それって便利そうですけど、なんだか心配です。"
100
- },
101
- {
102
- "speaker": "Teacher",
103
- "text": "そうだね。もちろん、緊急時には素早い対応が必要だから便利な面もあるけど、その権限が濫用されると、とても危険なんだ。たとえば、総理大臣が自分に都合のいいように国を動かしたり、国民の自由を奪ったりすることができるようになってしまうかもしれない。"
104
- },
105
- {
106
- "speaker": "Student",
107
- "text": "韓国みたいに、軍隊が政治に口を出してくることもあり得るんですか?"
108
- },
109
- {
110
- "speaker": "Teacher",
111
- "text": "完全にあり得ないとは言えないからこそ、注意が必要なんだ。私たち国民は、自民党の改憲案が権力の濫用を防ぐための適切な制限を含んでいるのかをしっかり監視し、声を上げることが求められる。民主主義が損なわれるのを防ぐために、私たち一人ひとりが積極的に関心を持つことが大切なんだよ。"
112
- },
113
- {
114
- "speaker": "Student",
115
- "text": "ありがとうございます。とても良い勉強になりました。"
116
- },
117
- {
118
- "speaker": "Announcer",
119
- "text": "ご視聴、ありがとうございました。次回の放送もお楽しみに。"
120
- }
121
- ]
122
- }
5
+ "scriptName": "sensei_and_taro.json"
123
6
  }
@@ -11,6 +11,7 @@ import { MulmoScriptMethods } from "../methods/index.js";
11
11
  import { fileCacheAgentFilter } from "../utils/filters.js";
12
12
  import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
13
  import { text2hash, localizedText } from "../utils/utils.js";
14
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
14
15
  const { default: __, ...vanillaAgents } = agents;
15
16
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
16
17
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -73,11 +74,14 @@ const graph_tts = {
73
74
  text: ":preprocessor.text",
74
75
  file: ":preprocessor.audioPath",
75
76
  force: ":context.force",
76
- },
77
- params: {
78
- voice: ":preprocessor.voiceId",
79
- speed: ":preprocessor.speechOptions.speed",
80
- instructions: ":preprocessor.speechOptions.instruction",
77
+ studio: ":context.studio", // for cache
78
+ index: ":__mapIndex", // for cache
79
+ sessionType: "audio", // for cache
80
+ params: {
81
+ voice: ":preprocessor.voiceId",
82
+ speed: ":preprocessor.speechOptions.speed",
83
+ instructions: ":preprocessor.speechOptions.instruction",
84
+ },
81
85
  },
82
86
  },
83
87
  },
@@ -157,30 +161,36 @@ const agentFilters = [
157
161
  },
158
162
  ];
159
163
  export const audio = async (context) => {
160
- const { studio, fileDirs, lang } = context;
161
- const { outDirPath, audioDirPath } = fileDirs;
162
- const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
163
- const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
164
- const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
165
- const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
166
- mkdir(outDirPath);
167
- mkdir(audioSegmentDirPath);
168
- graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
169
- const graph = new GraphAI(graph_data, {
170
- ...vanillaAgents,
171
- fileWriteAgent,
172
- ttsOpenaiAgent,
173
- ttsNijivoiceAgent,
174
- ttsGoogleAgent,
175
- addBGMAgent,
176
- combineAudioFilesAgent,
177
- }, { agentFilters });
178
- graph.injectValue("context", context);
179
- graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
180
- graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
181
- graph.injectValue("outputStudioFilePath", outputStudioFilePath);
182
- graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
183
- graph.injectValue("audioDirPath", audioDirPath);
184
- await graph.run();
185
- writingMessage(audioCombinedFilePath);
164
+ try {
165
+ MulmoStudioMethods.setSessionState(context.studio, "audio", true);
166
+ const { studio, fileDirs, lang } = context;
167
+ const { outDirPath, audioDirPath } = fileDirs;
168
+ const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
169
+ const audioSegmentDirPath = getAudioSegmentDirPath(audioDirPath, studio.filename);
170
+ const audioCombinedFilePath = getAudioCombinedFilePath(audioDirPath, studio.filename, lang);
171
+ const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
172
+ mkdir(outDirPath);
173
+ mkdir(audioSegmentDirPath);
174
+ graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
175
+ const graph = new GraphAI(graph_data, {
176
+ ...vanillaAgents,
177
+ fileWriteAgent,
178
+ ttsOpenaiAgent,
179
+ ttsNijivoiceAgent,
180
+ ttsGoogleAgent,
181
+ addBGMAgent,
182
+ combineAudioFilesAgent,
183
+ }, { agentFilters });
184
+ graph.injectValue("context", context);
185
+ graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
186
+ graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
187
+ graph.injectValue("outputStudioFilePath", outputStudioFilePath);
188
+ graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
189
+ graph.injectValue("audioDirPath", audioDirPath);
190
+ await graph.run();
191
+ writingMessage(audioCombinedFilePath);
192
+ }
193
+ finally {
194
+ MulmoStudioMethods.setSessionState(context.studio, "audio", false);
195
+ }
186
196
  };
@@ -2,6 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
2
2
  import * as agents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
5
6
  const { default: __, ...vanillaAgents } = agents;
6
7
  const graph_data = {
7
8
  version: 0.5,
@@ -17,31 +18,37 @@ const graph_data = {
17
18
  },
18
19
  graph: {
19
20
  nodes: {
20
- test: {
21
+ generateCaption: {
21
22
  agent: async (namedInputs) => {
22
23
  const { beat, context, index } = namedInputs;
23
- const { fileDirs } = namedInputs.context;
24
- const { caption } = context;
25
- const { imageDirPath } = fileDirs;
26
- const { canvasSize } = context.studio.script;
27
- const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
28
- const template = getHTMLFile("caption");
29
- const text = (() => {
30
- const multiLingual = context.studio.multiLingual;
31
- if (caption && multiLingual) {
32
- return multiLingual[index].multiLingualTexts[caption].text;
33
- }
34
- GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
35
- return beat.text;
36
- })();
37
- const htmlData = interpolate(template, {
38
- caption: text,
39
- width: `${canvasSize.width}`,
40
- height: `${canvasSize.height}`,
41
- });
42
- await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
43
- context.studio.beats[index].captionFile = imagePath;
44
- return imagePath;
24
+ try {
25
+ MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
26
+ const { fileDirs } = namedInputs.context;
27
+ const { caption } = context;
28
+ const { imageDirPath } = fileDirs;
29
+ const { canvasSize } = context.studio.script;
30
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
31
+ const template = getHTMLFile("caption");
32
+ const text = (() => {
33
+ const multiLingual = context.studio.multiLingual;
34
+ if (caption && multiLingual) {
35
+ return multiLingual[index].multiLingualTexts[caption].text;
36
+ }
37
+ GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${caption}`);
38
+ return beat.text;
39
+ })();
40
+ const htmlData = interpolate(template, {
41
+ caption: text,
42
+ width: `${canvasSize.width}`,
43
+ height: `${canvasSize.height}`,
44
+ });
45
+ await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
46
+ context.studio.beats[index].captionFile = imagePath;
47
+ return imagePath;
48
+ }
49
+ finally {
50
+ MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
51
+ }
45
52
  },
46
53
  inputs: {
47
54
  beat: ":beat",
@@ -56,7 +63,13 @@ const graph_data = {
56
63
  },
57
64
  };
58
65
  export const captions = async (context) => {
59
- const graph = new GraphAI(graph_data, { ...vanillaAgents });
60
- graph.injectValue("context", context);
61
- await graph.run();
66
+ try {
67
+ MulmoStudioMethods.setSessionState(context.studio, "caption", true);
68
+ const graph = new GraphAI(graph_data, { ...vanillaAgents });
69
+ graph.injectValue("context", context);
70
+ await graph.run();
71
+ }
72
+ finally {
73
+ MulmoStudioMethods.setSessionState(context.studio, "caption", false);
74
+ }
62
75
  };
@@ -13,6 +13,7 @@ const { default: __, ...vanillaAgents } = agents;
13
13
  dotenv.config();
14
14
  // const openai = new OpenAI();
15
15
  import { GoogleAuth } from "google-auth-library";
16
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
16
17
  const htmlStyle = (script, beat) => {
17
18
  return {
18
19
  canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -30,10 +31,16 @@ const imagePreprocessAgent = async (namedInputs) => {
30
31
  if (beat.image) {
31
32
  const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
32
33
  if (plugin) {
33
- const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
34
- const path = await plugin.process(processorParams);
35
- // undefined prompt indicates that image generation is not needed
36
- return { path, ...returnValue };
34
+ try {
35
+ MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
36
+ const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
37
+ const path = await plugin.process(processorParams);
38
+ // undefined prompt indicates that image generation is not needed
39
+ return { path, ...returnValue };
40
+ }
41
+ finally {
42
+ MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
43
+ }
37
44
  }
38
45
  }
39
46
  const prompt = imagePrompt(beat, imageParams.style);
@@ -71,17 +78,21 @@ const graph_data = {
71
78
  imageGenerator: {
72
79
  if: ":preprocessor.prompt",
73
80
  agent: ":imageAgentInfo.agent",
74
- params: {
75
- model: ":preprocessor.imageParams.model",
76
- size: ":preprocessor.imageParams.size",
77
- moderation: ":preprocessor.imageParams.moderation",
78
- aspectRatio: ":preprocessor.aspectRatio",
79
- },
81
+ retry: 3,
80
82
  inputs: {
81
83
  prompt: ":preprocessor.prompt",
82
84
  file: ":preprocessor.path", // only for fileCacheAgentFilter
83
85
  text: ":preprocessor.prompt", // only for fileCacheAgentFilter
84
86
  force: ":context.force",
87
+ studio: ":context.studio", // for cache
88
+ index: ":__mapIndex", // for cache
89
+ sessionType: "image", // for cache
90
+ params: {
91
+ model: ":preprocessor.imageParams.model",
92
+ size: ":preprocessor.imageParams.size",
93
+ moderation: ":preprocessor.imageParams.moderation",
94
+ aspectRatio: ":preprocessor.aspectRatio",
95
+ },
85
96
  },
86
97
  defaultValue: {},
87
98
  },
@@ -133,7 +144,7 @@ const googleAuth = async () => {
133
144
  const accessToken = await client.getAccessToken();
134
145
  return accessToken.token;
135
146
  };
136
- export const images = async (context) => {
147
+ const generateImages = async (context) => {
137
148
  const { studio, fileDirs } = context;
138
149
  const { outDirPath, imageDirPath } = fileDirs;
139
150
  mkdir(`${imageDirPath}/${studio.filename}`);
@@ -172,3 +183,12 @@ export const images = async (context) => {
172
183
  });
173
184
  await graph.run();
174
185
  };
186
+ export const images = async (context) => {
187
+ try {
188
+ MulmoStudioMethods.setSessionState(context.studio, "image", true);
189
+ await generateImages(context);
190
+ }
191
+ finally {
192
+ MulmoStudioMethods.setSessionState(context.studio, "image", false);
193
+ }
194
+ };
@@ -2,27 +2,28 @@ import { GraphAILogger } from "graphai";
2
2
  import { MulmoScriptMethods } from "../methods/index.js";
3
3
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
4
4
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
5
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
5
6
  // const isMac = process.platform === "darwin";
6
7
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
7
8
  export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
8
9
  const videoId = `v${inputIndex}`;
10
+ const videoFilters = [];
11
+ // Handle different media types
12
+ if (mediaType === "image") {
13
+ videoFilters.push("loop=loop=-1:size=1:start=0");
14
+ }
15
+ else if (mediaType === "movie") {
16
+ // For videos, extend with last frame if shorter than required duration
17
+ // tpad will extend the video by cloning the last frame, then trim will ensure exact duration
18
+ videoFilters.push(`tpad=stop_mode=clone:stop_duration=${duration * 2}`); // Use 2x duration to ensure coverage
19
+ }
20
+ // Common filters for all media types
21
+ videoFilters.push(`trim=duration=${duration}`, "fps=30", "setpts=PTS-STARTPTS", `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
22
+ // In case of the aspect ratio mismatch, we fill the extra space with black color.
23
+ `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`, "setsar=1", "format=yuv420p");
9
24
  return {
10
25
  videoId,
11
- videoPart: `[${inputIndex}:v]` +
12
- [
13
- mediaType === "image" ? "loop=loop=-1:size=1:start=0" : "",
14
- `trim=duration=${duration}`,
15
- "fps=30",
16
- "setpts=PTS-STARTPTS",
17
- `scale=w=${canvasInfo.width}:h=${canvasInfo.height}:force_original_aspect_ratio=decrease`,
18
- // In case of the aspect ratio mismatch, we fill the extra space with black color.
19
- `pad=${canvasInfo.width}:${canvasInfo.height}:(ow-iw)/2:(oh-ih)/2:color=black`,
20
- "setsar=1",
21
- "format=yuv420p",
22
- ]
23
- .filter((a) => a)
24
- .join(",") +
25
- `[${videoId}]`,
26
+ videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
26
27
  };
27
28
  };
28
29
  export const getAudioPart = (inputIndex, duration, delay) => {
@@ -121,12 +122,20 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
121
122
  await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
122
123
  const end = performance.now();
123
124
  GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
125
+ GraphAILogger.info(studio.script.title);
126
+ GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
124
127
  };
125
128
  export const movie = async (context) => {
126
- const { studio, fileDirs, caption } = context;
127
- const { outDirPath } = fileDirs;
128
- const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
129
- const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
130
- await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
131
- writingMessage(outputVideoPath);
129
+ MulmoStudioMethods.setSessionState(context.studio, "video", true);
130
+ try {
131
+ const { studio, fileDirs, caption } = context;
132
+ const { outDirPath } = fileDirs;
133
+ const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
134
+ const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
135
+ await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
136
+ writingMessage(outputVideoPath);
137
+ }
138
+ finally {
139
+ MulmoStudioMethods.setSessionState(context.studio, "video", false);
140
+ }
132
141
  };
@@ -6,6 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
6
6
  import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
7
7
  import { MulmoScriptMethods } from "../methods/index.js";
8
8
  import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
9
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
9
10
  const imagesPerPage = 4;
10
11
  const offset = 10;
11
12
  const handoutImageRatio = 0.5;
@@ -189,7 +190,7 @@ const outputSize = (pdfSize, isLandscapeImage, isRotate) => {
189
190
  }
190
191
  return { width: 612, height: 792 };
191
192
  };
192
- export const pdf = async (context, pdfMode, pdfSize) => {
193
+ const generatePdf = async (context, pdfMode, pdfSize) => {
193
194
  const { studio, fileDirs, lang } = context;
194
195
  const { multiLingual } = studio;
195
196
  const { outDirPath } = fileDirs;
@@ -219,3 +220,12 @@ export const pdf = async (context, pdfMode, pdfSize) => {
219
220
  fs.writeFileSync(outputPdfPath, pdfBytes);
220
221
  writingMessage(outputPdfPath);
221
222
  };
223
+ export const pdf = async (context, pdfMode, pdfSize) => {
224
+ try {
225
+ MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
226
+ await generatePdf(context, pdfMode, pdfSize);
227
+ }
228
+ finally {
229
+ MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
230
+ }
231
+ };
@@ -6,6 +6,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
8
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
+ import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
9
10
  const { default: __, ...vanillaAgents } = agents;
10
11
  const translateGraph = {
11
12
  version: 0.5,
@@ -62,6 +63,7 @@ const translateGraph = {
62
63
  rows: ":targetLangs",
63
64
  lang: ":lang.text",
64
65
  studio: ":studio",
66
+ beatIndex: ":__mapIndex",
65
67
  },
66
68
  params: {
67
69
  compositeResult: true,
@@ -76,6 +78,8 @@ const translateGraph = {
76
78
  beat: ":beat", // for cache
77
79
  multiLingual: ":multiLingual", // for cache
78
80
  lang: ":lang", // for cache
81
+ beatIndex: ":beatIndex", // for cache
82
+ studio: ":studio", // for cache
79
83
  system: translateSystemPrompt,
80
84
  prompt: translatePrompts,
81
85
  },
@@ -171,7 +175,7 @@ const translateGraph = {
171
175
  };
172
176
  const localizedTextCacheAgentFilter = async (context, next) => {
173
177
  const { namedInputs } = context;
174
- const { targetLang, beat, lang, multiLingual } = namedInputs;
178
+ const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
175
179
  if (!beat.text) {
176
180
  return { text: "" };
177
181
  }
@@ -187,7 +191,13 @@ const localizedTextCacheAgentFilter = async (context, next) => {
187
191
  if (targetLang === lang) {
188
192
  return { text: beat.text };
189
193
  }
190
- return await next(context);
194
+ try {
195
+ MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
196
+ return await next(context);
197
+ }
198
+ finally {
199
+ MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
200
+ }
191
201
  };
192
202
  const agentFilters = [
193
203
  {
@@ -199,21 +209,26 @@ const agentFilters = [
199
209
  const defaultLang = "en";
200
210
  const targetLangs = ["ja", "en"];
201
211
  export const translate = async (context) => {
202
- const { studio, fileDirs } = context;
203
- const { outDirPath } = fileDirs;
204
- const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
205
- mkdir(outDirPath);
206
- assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
207
- const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
208
- graph.injectValue("studio", studio);
209
- graph.injectValue("defaultLang", defaultLang);
210
- graph.injectValue("targetLangs", targetLangs);
211
- graph.injectValue("outDirPath", outDirPath);
212
- graph.injectValue("outputStudioFilePath", outputStudioFilePath);
213
- const results = await graph.run();
214
- writingMessage(outputStudioFilePath);
215
- if (results.mergeStudioResult) {
216
- context.studio = results.mergeStudioResult;
212
+ try {
213
+ MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
214
+ const { studio, fileDirs } = context;
215
+ const { outDirPath } = fileDirs;
216
+ const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
217
+ mkdir(outDirPath);
218
+ assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
219
+ const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
220
+ graph.injectValue("studio", studio);
221
+ graph.injectValue("defaultLang", defaultLang);
222
+ graph.injectValue("targetLangs", targetLangs);
223
+ graph.injectValue("outDirPath", outDirPath);
224
+ graph.injectValue("outputStudioFilePath", outputStudioFilePath);
225
+ const results = await graph.run();
226
+ writingMessage(outputStudioFilePath);
227
+ if (results.mergeStudioResult) {
228
+ context.studio = results.mergeStudioResult;
229
+ }
230
+ }
231
+ finally {
232
+ MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
217
233
  }
218
- // console.log(JSON.stringify(results, null, 2));
219
234
  };
@@ -9,19 +9,34 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
9
9
  const silentIds = context.studio.beats.map((_, index) => `[ls_${index}]`);
10
10
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
11
11
  const inputIds = (await Promise.all(context.studio.beats.map(async (studioBeat, index) => {
12
+ const beat = context.studio.script.beats[index];
12
13
  const isClosingGap = index === context.studio.beats.length - 2;
13
14
  if (studioBeat.audioFile) {
14
15
  const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
15
16
  const padding = (() => {
17
+ if (beat.audioParams?.padding !== undefined) {
18
+ return beat.audioParams.padding;
19
+ }
16
20
  if (index === context.studio.beats.length - 1) {
17
21
  return 0;
18
22
  }
19
23
  return isClosingGap ? context.studio.script.audioParams.closingPadding : context.studio.script.audioParams.padding;
20
24
  })();
21
- studioBeat.duration = (await ffmpegGetMediaDuration(studioBeat.audioFile)) + padding;
22
- if (padding > 0) {
25
+ const audioDuration = await ffmpegGetMediaDuration(studioBeat.audioFile);
26
+ const totalPadding = await (async () => {
27
+ if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
28
+ const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
29
+ const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
30
+ if (movieDuration > audioDuration) {
31
+ return padding + (movieDuration - audioDuration);
32
+ }
33
+ }
34
+ return padding;
35
+ })();
36
+ studioBeat.duration = audioDuration + totalPadding;
37
+ if (totalPadding > 0) {
23
38
  const silentId = silentIds.pop();
24
- ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${padding}[padding_${index}]`);
39
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${totalPadding}[padding_${index}]`);
25
40
  return [audioId, `[padding_${index}]`];
26
41
  }
27
42
  else {
@@ -30,16 +45,12 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
30
45
  }
31
46
  else {
32
47
  // NOTE: We come here when the text is empty and no audio property is specified.
33
- studioBeat.duration = context.studio.script.beats[index].duration ?? 1.0;
48
+ studioBeat.duration = beat.duration ?? 1.0;
34
49
  const silentId = silentIds.pop();
35
50
  ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${studioBeat.duration}[silent_${index}]`);
36
51
  return [`[silent_${index}]`];
37
52
  }
38
53
  }))).flat();
39
- // HACK: Because the last beat may not use an silent audio, we need to consume it to make ffmpeg happy.
40
- if (silentIds.length > 1) {
41
- throw new Error("UNEXPECTED: silentIds.length > 1");
42
- }
43
54
  silentIds.forEach((silentId) => {
44
55
  GraphAILogger.log(`Using extra silentId: ${silentId}`);
45
56
  ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${0.01}[silent_extra]`);
@@ -14,7 +14,7 @@ export declare const builder: (yargs: Argv) => Argv<{
14
14
  } & {
15
15
  s: string;
16
16
  } & {
17
- llm_agent: string | undefined;
17
+ llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
18
18
  } & {
19
19
  llm_model: string | undefined;
20
20
  }>;