mulmocast 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +1 -3
  2. package/assets/templates/ghibli_shorts.json +34 -0
  3. package/assets/templates/trailer.json +25 -0
  4. package/lib/actions/audio.js +29 -16
  5. package/lib/actions/captions.js +5 -5
  6. package/lib/actions/images.js +51 -12
  7. package/lib/actions/movie.js +46 -13
  8. package/lib/actions/pdf.js +3 -3
  9. package/lib/actions/translate.js +15 -15
  10. package/lib/agents/image_openai_agent.js +6 -3
  11. package/lib/agents/index.d.ts +2 -1
  12. package/lib/agents/index.js +2 -1
  13. package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
  14. package/lib/agents/tts_elevenlabs_agent.js +60 -0
  15. package/lib/agents/tts_google_agent.js +1 -1
  16. package/lib/agents/tts_nijivoice_agent.js +3 -2
  17. package/lib/agents/tts_openai_agent.js +1 -1
  18. package/lib/cli/commands/audio/handler.js +4 -1
  19. package/lib/cli/commands/image/handler.js +4 -1
  20. package/lib/cli/commands/movie/handler.js +4 -1
  21. package/lib/cli/commands/pdf/handler.js +4 -1
  22. package/lib/cli/commands/translate/handler.js +4 -1
  23. package/lib/cli/helpers.d.ts +3 -3
  24. package/lib/cli/helpers.js +38 -20
  25. package/lib/methods/mulmo_media_source.d.ts +1 -0
  26. package/lib/methods/mulmo_media_source.js +12 -0
  27. package/lib/methods/mulmo_script.d.ts +1 -0
  28. package/lib/methods/mulmo_script.js +9 -0
  29. package/lib/methods/mulmo_studio_context.d.ts +5 -0
  30. package/lib/methods/mulmo_studio_context.js +23 -0
  31. package/lib/types/schema.d.ts +1498 -242
  32. package/lib/types/schema.js +25 -34
  33. package/lib/types/type.d.ts +4 -1
  34. package/lib/utils/file.d.ts +4 -15
  35. package/lib/utils/file.js +2 -13
  36. package/lib/utils/filters.js +4 -4
  37. package/lib/utils/image_plugins/beat.d.ts +4 -0
  38. package/lib/utils/image_plugins/beat.js +7 -0
  39. package/lib/utils/image_plugins/index.d.ts +2 -1
  40. package/lib/utils/image_plugins/index.js +2 -1
  41. package/lib/utils/image_plugins/source.js +2 -2
  42. package/lib/utils/preprocess.d.ts +24 -20
  43. package/lib/utils/preprocess.js +4 -0
  44. package/package.json +1 -1
  45. package/scripts/templates/movie_prompts_no_text_template.json +50 -0
package/README.md CHANGED
@@ -101,9 +101,7 @@ DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
101
101
  GOOGLE_PROJECT_ID=your_google_project_id
102
102
  ```
103
103
 
104
- You may also need to take the following steps before running any commands:
105
- 1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
106
- 2. Login by `gcloud auth application-default login`
104
+ See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
107
105
 
108
106
  #### (Optional) For Nijivoice's TTS model
109
107
  ```bash
@@ -0,0 +1,34 @@
1
+ {
2
+ "title": "Ghibli comic style",
3
+ "description": "Template for Ghibli-style comic presentation.",
4
+ "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1024,
12
+ "height": 1536
13
+ },
14
+ "speechParams": {
15
+ "provider": "nijivoice",
16
+ "speakers": {
17
+ "Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
18
+ }
19
+ },
20
+ "imageParams": {
21
+ "style": "<style>Ghibli style</style>",
22
+ "images": {
23
+ "presenter": {
24
+ "type": "image",
25
+ "source": {
26
+ "kind": "url",
27
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
28
+ }
29
+ }
30
+ }
31
+ }
32
+ },
33
+ "scriptName": "image_prompts_template.json"
34
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "title": "Movie Trailer template",
3
+ "description": "Template for A Movie Trailer.",
4
+ "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0"
8
+ },
9
+ "canvasSize": {
10
+ "width": 1280,
11
+ "height": 720
12
+ },
13
+ "imageParams": {
14
+ "style": "<style>Photo realistic, cinematic.</style>"
15
+ },
16
+ "audioParams": {
17
+ "padding": 0.0,
18
+ "introPadding": 0.0,
19
+ "closingPadding": 0.0,
20
+ "outroPadding": 2.5,
21
+ "bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
22
+ }
23
+ },
24
+ "scriptName": "movie_prompts_no_text_template.json"
25
+ }
@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
6
6
  import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
7
7
  import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
8
  import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
+ import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
9
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
10
11
  import { MulmoScriptMethods } from "../methods/index.js";
11
12
  import { fileCacheAgentFilter } from "../utils/filters.js";
12
- import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
+ import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
13
14
  import { text2hash, localizedText } from "../utils/utils.js";
14
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
15
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
16
+ import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
15
17
  const vanillaAgents = agents.default ?? agents;
16
18
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
17
19
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -19,10 +21,11 @@ const provider_to_agent = {
19
21
  nijivoice: "ttsNijivoiceAgent",
20
22
  openai: "ttsOpenaiAgent",
21
23
  google: "ttsGoogleAgent",
24
+ elevenlabs: "ttsElevenlabsAgent",
22
25
  };
23
26
  const getAudioPath = (context, beat, audioFile, audioDirPath) => {
24
27
  if (beat.audio?.type === "audio") {
25
- const path = resolveMediaSource(beat.audio.source, context);
28
+ const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
26
29
  if (path) {
27
30
  return path;
28
31
  }
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
34
37
  return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
35
38
  };
36
39
  const preprocessor = (namedInputs) => {
37
- const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
40
+ const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
38
41
  const { lang } = context;
39
- const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
42
+ const speaker = context.studio.script.speechParams.speakers[beat.speaker];
43
+ const voiceId = speaker.voiceId;
40
44
  const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
41
45
  const text = localizedText(beat, multiLingual, lang);
42
- const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
43
- const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
46
+ // Use speaker-specific provider if available, otherwise fall back to script-level provider
47
+ const provider = speaker.provider ?? context.studio.script.speechParams.provider;
48
+ const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
49
+ const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
44
50
  const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
45
51
  studioBeat.audioFile = audioPath;
46
52
  const needsTTS = !beat.audio && audioPath !== undefined;
47
53
  return {
48
- ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
54
+ ttsAgent: provider_to_agent[provider],
49
55
  studioBeat,
50
56
  voiceId,
51
57
  speechOptions,
@@ -62,7 +68,6 @@ const graph_tts = {
62
68
  beat: ":beat",
63
69
  studioBeat: ":studioBeat",
64
70
  multiLingual: ":multiLingual",
65
- index: ":__mapIndex",
66
71
  context: ":context",
67
72
  audioDirPath: ":audioDirPath",
68
73
  },
@@ -74,7 +79,7 @@ const graph_tts = {
74
79
  text: ":preprocessor.text",
75
80
  file: ":preprocessor.audioPath",
76
81
  force: ":context.force",
77
- studio: ":context.studio", // for cache
82
+ mulmoContext: ":context", // for cache
78
83
  index: ":__mapIndex", // for cache
79
84
  sessionType: "audio", // for cache
80
85
  params: {
@@ -96,6 +101,7 @@ const graph_data = {
96
101
  outputStudioFilePath: {},
97
102
  audioDirPath: {},
98
103
  audioSegmentDirPath: {},
104
+ musicFile: {},
99
105
  map: {
100
106
  agent: "mapAgent",
101
107
  inputs: {
@@ -130,14 +136,14 @@ const graph_data = {
130
136
  },
131
137
  addBGM: {
132
138
  agent: "addBGMAgent",
133
- params: {
134
- musicFile: process.env.PATH_BGM ?? defaultBGMPath,
135
- },
136
139
  inputs: {
137
140
  wait: ":combineFiles",
138
141
  voiceFile: ":audioCombinedFilePath",
139
142
  outputFile: ":audioArtifactFilePath",
140
143
  script: ":context.studio.script",
144
+ params: {
145
+ musicFile: ":musicFile",
146
+ },
141
147
  },
142
148
  isResult: true,
143
149
  },
@@ -162,7 +168,7 @@ const agentFilters = [
162
168
  ];
163
169
  export const audio = async (context, callbacks) => {
164
170
  try {
165
- MulmoStudioMethods.setSessionState(context.studio, "audio", true);
171
+ MulmoStudioContextMethods.setSessionState(context, "audio", true);
166
172
  const { studio, fileDirs, lang } = context;
167
173
  const { outDirPath, audioDirPath } = fileDirs;
168
174
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
@@ -171,13 +177,19 @@ export const audio = async (context, callbacks) => {
171
177
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
172
178
  mkdir(outDirPath);
173
179
  mkdir(audioSegmentDirPath);
174
- graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
180
+ // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
181
+ const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
182
+ const provider = speaker.provider ?? studio.script.speechParams.provider;
183
+ return provider === "nijivoice" || provider === "elevenlabs";
184
+ });
185
+ graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
175
186
  const graph = new GraphAI(graph_data, {
176
187
  ...vanillaAgents,
177
188
  fileWriteAgent,
178
189
  ttsOpenaiAgent,
179
190
  ttsNijivoiceAgent,
180
191
  ttsGoogleAgent,
192
+ ttsElevenlabsAgent,
181
193
  addBGMAgent,
182
194
  combineAudioFilesAgent,
183
195
  }, { agentFilters });
@@ -187,6 +199,7 @@ export const audio = async (context, callbacks) => {
187
199
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
188
200
  graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
189
201
  graph.injectValue("audioDirPath", audioDirPath);
202
+ graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
190
203
  if (callbacks) {
191
204
  callbacks.forEach((callback) => {
192
205
  graph.registerCallback(callback);
@@ -196,6 +209,6 @@ export const audio = async (context, callbacks) => {
196
209
  writingMessage(audioCombinedFilePath);
197
210
  }
198
211
  finally {
199
- MulmoStudioMethods.setSessionState(context.studio, "audio", false);
212
+ MulmoStudioContextMethods.setSessionState(context, "audio", false);
200
213
  }
201
214
  };
@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
2
2
  import * as agents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
5
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
6
6
  const vanillaAgents = agents.default ?? agents;
7
7
  const graph_data = {
8
8
  version: 0.5,
@@ -22,7 +22,7 @@ const graph_data = {
22
22
  agent: async (namedInputs) => {
23
23
  const { beat, context, index } = namedInputs;
24
24
  try {
25
- MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
25
+ MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
26
26
  const { fileDirs } = namedInputs.context;
27
27
  const { caption } = context;
28
28
  const { imageDirPath } = fileDirs;
@@ -47,7 +47,7 @@ const graph_data = {
47
47
  return imagePath;
48
48
  }
49
49
  finally {
50
- MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
50
+ MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
51
51
  }
52
52
  },
53
53
  inputs: {
@@ -64,12 +64,12 @@ const graph_data = {
64
64
  };
65
65
  export const captions = async (context) => {
66
66
  try {
67
- MulmoStudioMethods.setSessionState(context.studio, "caption", true);
67
+ MulmoStudioContextMethods.setSessionState(context, "caption", true);
68
68
  const graph = new GraphAI(graph_data, { ...vanillaAgents });
69
69
  graph.injectValue("context", context);
70
70
  await graph.run();
71
71
  }
72
72
  finally {
73
- MulmoStudioMethods.setSessionState(context.studio, "caption", false);
73
+ MulmoStudioContextMethods.setSessionState(context, "caption", false);
74
74
  }
75
75
  };
@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
15
15
  dotenv.config();
16
16
  // const openai = new OpenAI();
17
17
  import { GoogleAuth } from "google-auth-library";
18
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
19
18
  const htmlStyle = (script, beat) => {
20
19
  return {
21
20
  canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -34,14 +33,14 @@ const imagePreprocessAgent = async (namedInputs) => {
34
33
  const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
35
34
  if (plugin) {
36
35
  try {
37
- MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
36
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
38
37
  const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
39
38
  const path = await plugin.process(processorParams);
40
39
  // undefined prompt indicates that image generation is not needed
41
40
  return { imagePath: path, ...returnValue };
42
41
  }
43
42
  finally {
44
- MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
43
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
45
44
  }
46
45
  }
47
46
  }
@@ -104,7 +103,7 @@ const graph_data = {
104
103
  file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
105
104
  text: ":preprocessor.prompt", // only for fileCacheAgentFilter
106
105
  force: ":context.force", // only for fileCacheAgentFilter
107
- studio: ":context.studio", // for fileCacheAgentFilter
106
+ mulmoContext: ":context", // for fileCacheAgentFilter
108
107
  index: ":__mapIndex", // for fileCacheAgentFilter
109
108
  sessionType: "image", // for fileCacheAgentFilter
110
109
  params: {
@@ -134,13 +133,20 @@ const graph_data = {
134
133
  },
135
134
  defaultValue: {},
136
135
  },
137
- output: {
136
+ onComplete: {
138
137
  agent: "copyAgent",
139
138
  inputs: {
140
- onComplete: ":movieGenerator",
139
+ onComplete: ":movieGenerator", // to wait for movieGenerator to finish
141
140
  imageFile: ":preprocessor.imagePath",
142
141
  movieFile: ":preprocessor.movieFile",
143
142
  },
143
+ },
144
+ output: {
145
+ agent: "copyAgent",
146
+ inputs: {
147
+ imageFile: ":onComplete.imageFile",
148
+ movieFile: ":onComplete.movieFile",
149
+ },
144
150
  isResult: true,
145
151
  },
146
152
  },
@@ -150,11 +156,26 @@ const graph_data = {
150
156
  agent: (namedInputs) => {
151
157
  const { array, context } = namedInputs;
152
158
  const { studio } = context;
159
+ const beatIndexMap = {};
153
160
  array.forEach((update, index) => {
154
161
  const beat = studio.beats[index];
155
162
  studio.beats[index] = { ...beat, ...update };
163
+ const id = studio.script.beats[index].id;
164
+ if (id) {
165
+ beatIndexMap[id] = index;
166
+ }
167
+ });
168
+ studio.beats.forEach((studioBeat, index) => {
169
+ const beat = studio.script.beats[index];
170
+ if (beat.image?.type === "beat") {
171
+ if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
172
+ studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
173
+ }
174
+ else if (index > 0) {
175
+ studioBeat.imageFile = studio.beats[index - 1].imageFile;
176
+ }
177
+ }
156
178
  });
157
- // console.log(namedInputs);
158
179
  return { studio };
159
180
  },
160
181
  inputs: {
@@ -181,9 +202,9 @@ const googleAuth = async () => {
181
202
  const accessToken = await client.getAccessToken();
182
203
  return accessToken.token;
183
204
  }
184
- catch (__error) {
205
+ catch (error) {
185
206
  GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
186
- process.exit(1);
207
+ throw error;
187
208
  }
188
209
  };
189
210
  const generateImages = async (context, callbacks) => {
@@ -236,7 +257,25 @@ const generateImages = async (context, callbacks) => {
236
257
  throw new Error(`Failed to download image: ${image.source.url}`);
237
258
  }
238
259
  const buffer = Buffer.from(await response.arrayBuffer());
239
- const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
260
+ // Detect file extension from Content-Type header or URL
261
+ const extension = (() => {
262
+ const contentType = response.headers.get("content-type");
263
+ if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
264
+ return "jpg";
265
+ }
266
+ else if (contentType?.includes("png")) {
267
+ return "png";
268
+ }
269
+ else {
270
+ // Fall back to URL extension
271
+ const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
272
+ if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
273
+ return urlExtension === "jpeg" ? "jpg" : urlExtension;
274
+ }
275
+ return "png"; // default
276
+ }
277
+ })();
278
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
240
279
  await fs.promises.writeFile(imagePath, buffer);
241
280
  imageRefs[key] = imagePath;
242
281
  }
@@ -263,10 +302,10 @@ const generateImages = async (context, callbacks) => {
263
302
  };
264
303
  export const images = async (context, callbacks) => {
265
304
  try {
266
- MulmoStudioMethods.setSessionState(context.studio, "image", true);
305
+ MulmoStudioContextMethods.setSessionState(context, "image", true);
267
306
  await generateImages(context, callbacks);
268
307
  }
269
308
  finally {
270
- MulmoStudioMethods.setSessionState(context.studio, "image", false);
309
+ MulmoStudioContextMethods.setSessionState(context, "image", false);
271
310
  }
272
311
  };
@@ -1,8 +1,9 @@
1
- import { GraphAILogger } from "graphai";
1
+ import { GraphAILogger, assert } from "graphai";
2
+ import { mulmoTransitionSchema } from "../types/index.js";
2
3
  import { MulmoScriptMethods } from "../methods/index.js";
3
4
  import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
4
5
  import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
5
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
6
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
6
7
  // const isMac = process.platform === "darwin";
7
8
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
8
9
  export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
@@ -38,10 +39,10 @@ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
38
39
  `[${audioId}]`,
39
40
  };
40
41
  };
41
- const getOutputOption = (audioId) => {
42
+ const getOutputOption = (audioId, videoId) => {
42
43
  return [
43
44
  "-preset medium", // Changed from veryfast to medium for better compression
44
- "-map [v]", // Map the video stream
45
+ `-map [${videoId}]`, // Map the video stream
45
46
  `-map ${audioId}`, // Map the audio stream
46
47
  `-c:v ${videoCodec}`, // Set video codec
47
48
  ...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
@@ -61,14 +62,17 @@ const getOutputOption = (audioId) => {
61
62
  const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
62
63
  const start = performance.now();
63
64
  const ffmpegContext = FfmpegContextInit();
64
- if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
65
- GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
66
- return;
65
+ const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
66
+ if (missingIndex !== -1) {
67
+ GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
68
+ return false;
67
69
  }
68
70
  const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
69
71
  // Add each image input
70
72
  const filterComplexVideoIds = [];
71
73
  const filterComplexAudioIds = [];
74
+ const transitionVideoIds = [];
75
+ const beatTimestamps = [];
72
76
  studio.beats.reduce((timestamp, studioBeat, index) => {
73
77
  const beat = studio.script.beats[index];
74
78
  const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
@@ -102,16 +106,43 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
102
106
  else {
103
107
  filterComplexVideoIds.push(videoId);
104
108
  }
109
+ if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
110
+ const sourceId = filterComplexVideoIds.pop();
111
+ ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
112
+ filterComplexVideoIds.push(`${sourceId}_0`);
113
+ transitionVideoIds.push(`${sourceId}_1`);
114
+ }
105
115
  if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
106
116
  const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
107
117
  filterComplexAudioIds.push(audioId);
108
118
  ffmpegContext.filterComplex.push(audioPart);
109
119
  }
120
+ beatTimestamps.push(timestamp);
110
121
  return timestamp + duration;
111
122
  }, 0);
123
+ assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
124
+ assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
112
125
  // console.log("*** images", images.audioIds);
113
126
  // Concatenate the trimmed images
114
- ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
127
+ const concatVideoId = "concat_video";
128
+ ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
129
+ // Add tranditions if needed
130
+ const mixedVideoId = (() => {
131
+ if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
132
+ const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
133
+ return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
134
+ const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
135
+ const processedVideoId = `${transitionVideoId}_f`;
136
+ // TODO: This mechanism does not work for video beats yet. It works only with image beats.
137
+ // If we can to add other transition types than fade, we need to add them here.
138
+ ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
139
+ const outputId = `${transitionVideoId}_o`;
140
+ ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
141
+ return outputId;
142
+ }, concatVideoId);
143
+ }
144
+ return concatVideoId;
145
+ })();
115
146
  const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
116
147
  const artifactAudioId = `${audioIndex}:a`;
117
148
  const ffmpegContextAudioId = (() => {
@@ -125,23 +156,25 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
125
156
  }
126
157
  return artifactAudioId;
127
158
  })();
128
- await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
159
+ await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
129
160
  const end = performance.now();
130
161
  GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
131
162
  GraphAILogger.info(studio.script.title);
132
163
  GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
164
+ return true;
133
165
  };
134
166
  export const movie = async (context) => {
135
- MulmoStudioMethods.setSessionState(context.studio, "video", true);
167
+ MulmoStudioContextMethods.setSessionState(context, "video", true);
136
168
  try {
137
169
  const { studio, fileDirs, caption } = context;
138
170
  const { outDirPath } = fileDirs;
139
171
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
140
172
  const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
141
- await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
142
- writingMessage(outputVideoPath);
173
+ if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
174
+ writingMessage(outputVideoPath);
175
+ }
143
176
  }
144
177
  finally {
145
- MulmoStudioMethods.setSessionState(context.studio, "video", false);
178
+ MulmoStudioContextMethods.setSessionState(context, "video", false);
146
179
  }
147
180
  };
@@ -6,7 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
6
6
  import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
7
7
  import { MulmoScriptMethods } from "../methods/index.js";
8
8
  import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
9
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
9
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
10
10
  const imagesPerPage = 4;
11
11
  const offset = 10;
12
12
  const handoutImageRatio = 0.5;
@@ -224,10 +224,10 @@ const generatePdf = async (context, pdfMode, pdfSize) => {
224
224
  };
225
225
  export const pdf = async (context, pdfMode, pdfSize) => {
226
226
  try {
227
- MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
227
+ MulmoStudioContextMethods.setSessionState(context, "pdf", true);
228
228
  await generatePdf(context, pdfMode, pdfSize);
229
229
  }
230
230
  finally {
231
- MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
231
+ MulmoStudioContextMethods.setSessionState(context, "pdf", false);
232
232
  }
233
233
  };
@@ -6,19 +6,19 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
8
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
9
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
10
10
  const vanillaAgents = agents.default ?? agents;
11
11
  const translateGraph = {
12
12
  version: 0.5,
13
13
  nodes: {
14
- studio: {},
14
+ context: {},
15
15
  defaultLang: {},
16
16
  outDirPath: {},
17
17
  outputStudioFilePath: {},
18
18
  lang: {
19
19
  agent: "stringUpdateTextAgent",
20
20
  inputs: {
21
- newText: ":studio.script.lang",
21
+ newText: ":context.studio.script.lang",
22
22
  oldText: ":defaultLang",
23
23
  },
24
24
  },
@@ -27,15 +27,15 @@ const translateGraph = {
27
27
  isResult: true,
28
28
  agent: "mergeObjectAgent",
29
29
  inputs: {
30
- items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
30
+ items: [":context.studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
31
31
  },
32
32
  },
33
33
  beatsMap: {
34
34
  agent: "mapAgent",
35
35
  inputs: {
36
36
  targetLangs: ":targetLangs",
37
- studio: ":studio",
38
- rows: ":studio.script.beats",
37
+ context: ":context",
38
+ rows: ":context.studio.script.beats",
39
39
  lang: ":lang",
40
40
  },
41
41
  params: {
@@ -52,7 +52,7 @@ const translateGraph = {
52
52
  },
53
53
  inputs: {
54
54
  index: ":__mapIndex",
55
- rows: ":studio.multiLingual",
55
+ rows: ":context.studio.multiLingual",
56
56
  },
57
57
  },
58
58
  preprocessMultiLingual: {
@@ -62,7 +62,7 @@ const translateGraph = {
62
62
  multiLingual: ":multiLingual",
63
63
  rows: ":targetLangs",
64
64
  lang: ":lang.text",
65
- studio: ":studio",
65
+ context: ":context",
66
66
  beatIndex: ":__mapIndex",
67
67
  },
68
68
  params: {
@@ -79,7 +79,7 @@ const translateGraph = {
79
79
  multiLingual: ":multiLingual", // for cache
80
80
  lang: ":lang", // for cache
81
81
  beatIndex: ":beatIndex", // for cache
82
- studio: ":studio", // for cache
82
+ mulmoContext: ":context", // for cache
83
83
  system: translateSystemPrompt,
84
84
  prompt: translatePrompts,
85
85
  },
@@ -175,7 +175,7 @@ const translateGraph = {
175
175
  };
176
176
  const localizedTextCacheAgentFilter = async (context, next) => {
177
177
  const { namedInputs } = context;
178
- const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
178
+ const { mulmoContext, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
179
179
  if (!beat.text) {
180
180
  return { text: "" };
181
181
  }
@@ -192,11 +192,11 @@ const localizedTextCacheAgentFilter = async (context, next) => {
192
192
  return { text: beat.text };
193
193
  }
194
194
  try {
195
- MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
195
+ MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
196
196
  return await next(context);
197
197
  }
198
198
  finally {
199
- MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
199
+ MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, false);
200
200
  }
201
201
  };
202
202
  const agentFilters = [
@@ -210,14 +210,14 @@ const defaultLang = "en";
210
210
  const targetLangs = ["ja", "en"];
211
211
  export const translate = async (context, callbacks) => {
212
212
  try {
213
- MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
213
+ MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
214
214
  const { studio, fileDirs } = context;
215
215
  const { outDirPath } = fileDirs;
216
216
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
217
217
  mkdir(outDirPath);
218
218
  assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
219
219
  const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
220
- graph.injectValue("studio", studio);
220
+ graph.injectValue("context", context);
221
221
  graph.injectValue("defaultLang", defaultLang);
222
222
  graph.injectValue("targetLangs", targetLangs);
223
223
  graph.injectValue("outDirPath", outDirPath);
@@ -234,6 +234,6 @@ export const translate = async (context, callbacks) => {
234
234
  }
235
235
  }
236
236
  finally {
237
- MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
237
+ MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
238
238
  }
239
239
  };