mulmocast 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +20 -3
  2. package/assets/templates/akira_comic.json +2 -2
  3. package/assets/templates/drslump_comic.json +2 -2
  4. package/assets/templates/ghibli_comic.json +2 -2
  5. package/assets/templates/ghost_comic.json +2 -2
  6. package/assets/templates/onepiece_comic.json +2 -2
  7. package/assets/templates/portrait_movie.json +28 -0
  8. package/assets/templates/realistic_movie.json +28 -0
  9. package/assets/templates/shorts.json +18 -0
  10. package/lib/actions/audio.d.ts +2 -1
  11. package/lib/actions/audio.js +8 -3
  12. package/lib/actions/captions.js +2 -2
  13. package/lib/actions/images.d.ts +2 -1
  14. package/lib/actions/images.js +68 -32
  15. package/lib/actions/movie.js +10 -6
  16. package/lib/actions/translate.d.ts +2 -1
  17. package/lib/actions/translate.js +8 -3
  18. package/lib/agents/combine_audio_files_agent.js +4 -0
  19. package/lib/agents/image_google_agent.d.ts +4 -1
  20. package/lib/agents/image_google_agent.js +3 -2
  21. package/lib/agents/image_openai_agent.d.ts +5 -3
  22. package/lib/agents/image_openai_agent.js +29 -4
  23. package/lib/agents/movie_google_agent.d.ts +24 -0
  24. package/lib/agents/movie_google_agent.js +122 -0
  25. package/lib/cli/bin.js +12 -0
  26. package/lib/index.d.ts +5 -0
  27. package/lib/index.js +5 -0
  28. package/lib/methods/mulmo_script.d.ts +0 -1
  29. package/lib/methods/mulmo_script.js +0 -5
  30. package/lib/methods/mulmo_studio.d.ts +1 -1
  31. package/lib/tools/create_mulmo_script_from_url.js +2 -2
  32. package/lib/tools/create_mulmo_script_interactively.js +2 -2
  33. package/lib/tools/story_to_script.js +2 -2
  34. package/lib/types/index.d.ts +1 -0
  35. package/lib/types/index.js +1 -0
  36. package/lib/types/schema.d.ts +155 -54
  37. package/lib/types/schema.js +14 -2
  38. package/lib/types/type.d.ts +3 -1
  39. package/lib/utils/file.d.ts +1 -0
  40. package/lib/utils/file.js +12 -8
  41. package/lib/utils/image_plugins/image.d.ts +1 -1
  42. package/lib/utils/image_plugins/movie.d.ts +1 -1
  43. package/lib/utils/preprocess.d.ts +9 -3
  44. package/lib/utils/utils.d.ts +1 -0
  45. package/lib/utils/utils.js +3 -0
  46. package/package.json +8 -8
  47. package/scripts/templates/movie_prompts_template.json +50 -0
  48. package/scripts/templates/shorts_template.json +52 -0
package/README.md CHANGED
@@ -90,11 +90,28 @@ Create a `.env` file in your project directory with the following API keys:
90
90
  ```bash
91
91
  OPENAI_API_KEY=your_openai_api_key
92
92
  ```
93
- ### Optional
93
+
94
+ #### (Optional) For the advanced image generation model
94
95
  ```bash
95
96
  DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
96
- GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
97
- NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
97
+ ```
98
+
99
+ #### (Optional) For Google's image generation model
100
+ ```bash
101
+ GOOGLE_PROJECT_ID=your_google_project_id
102
+ ```
103
+
104
+ You may also need to take the following steps before running any commands:
105
+ 1. Install [gcloud CLI](https://cloud.google.com/sdk/docs/install)
106
+ 2. Login by `gcloud auth application-default login`
107
+
108
+ #### (Optional) For Nijivoice's TTS model
109
+ ```bash
110
+ NIJIVOICE_API_KEY=your_nijivoice_api_key
111
+ ```
112
+
113
+ #### (Optional) to access web in mulmo tool
114
+ ```bash
98
115
  BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
99
116
  ```
100
117
 
@@ -1,6 +1,6 @@
1
1
  {
2
- "title": "Dr. Slump Style Comic Strips",
3
- "description": "Template for Dr. Slump-style comic.",
2
+ "title": "Akira style",
3
+ "description": "Template for Akira style comic presentation.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
@@ -1,6 +1,6 @@
1
1
  {
2
- "title": "Dr. Slump Style Comic Strips",
3
- "description": "Template for Dr. Slump-style comic.",
2
+ "title": "Dr. Slump Style",
3
+ "description": "Template for Dr. Slump style comic presentation.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
@@ -1,6 +1,6 @@
1
1
  {
2
- "title": "American Comic Strips",
3
- "description": "Template for Dilbert-style comic strips.",
2
+ "title": "Ghibli comic style",
3
+ "description": "Template for Ghibli-style comic presentation.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
@@ -1,6 +1,6 @@
1
1
  {
2
- "title": "Dr. Slump Style Comic Strips",
3
- "description": "Template for Dr. Slump-style comic.",
2
+ "title": "Ghost in the shell style",
3
+ "description": "Template for Ghost in the shell style comic presentation.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
@@ -1,6 +1,6 @@
1
1
  {
2
- "title": "Dr. Slump Style Comic Strips",
3
- "description": "Template for Dr. Slump-style comic.",
2
+ "title": "One Piece style",
3
+ "description": "Template for One Piece style comic presentation.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Photo realistic movie (portrait)",
3
+ "description": "Template for photo realistic movie in portrait mode.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1024,
12
+ "height": 1536
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Photo realistic, cinematic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "movie_prompts_template.json"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Photo realistic movie template",
3
+ "description": "Template for photo realistic movie.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Photo realistic, cinematic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "movie_prompts_template.json"
28
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "title": "Short movie template",
3
+ "description": "Template for Youtube shorts.",
4
+ "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0"
8
+ },
9
+ "canvasSize": {
10
+ "width": 720,
11
+ "height": 1280
12
+ },
13
+ "imageParams": {
14
+ "style": "<style>Photo realistic, cinematic.</style>"
15
+ }
16
+ },
17
+ "scriptName": "movie_prompts_template.json"
18
+ }
@@ -1,3 +1,4 @@
1
1
  import "dotenv/config";
2
+ import type { CallbackFunction } from "graphai";
2
3
  import { MulmoStudioContext } from "../types/index.js";
3
- export declare const audio: (context: MulmoStudioContext) => Promise<void>;
4
+ export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,6 +1,6 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI } from "graphai";
3
- import vanillaAgents from "@graphai/vanilla";
3
+ import * as agents from "@graphai/vanilla";
4
4
  import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
5
5
  import addBGMAgent from "../agents/add_bgm_agent.js";
6
6
  import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
12
12
  import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
13
  import { text2hash, localizedText } from "../utils/utils.js";
14
14
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
15
- // const { default: __, ...vanillaAgents } = agents;
15
+ const vanillaAgents = agents.default ?? agents;
16
16
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
17
17
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
18
18
  const provider_to_agent = {
@@ -160,7 +160,7 @@ const agentFilters = [
160
160
  nodeIds: ["tts"],
161
161
  },
162
162
  ];
163
- export const audio = async (context) => {
163
+ export const audio = async (context, callbacks) => {
164
164
  try {
165
165
  MulmoStudioMethods.setSessionState(context.studio, "audio", true);
166
166
  const { studio, fileDirs, lang } = context;
@@ -187,6 +187,11 @@ export const audio = async (context) => {
187
187
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
188
188
  graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
189
189
  graph.injectValue("audioDirPath", audioDirPath);
190
+ if (callbacks) {
191
+ callbacks.forEach((callback) => {
192
+ graph.registerCallback(callback);
193
+ });
194
+ }
190
195
  await graph.run();
191
196
  writingMessage(audioCombinedFilePath);
192
197
  }
@@ -1,9 +1,9 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
- import vanillaAgents from "@graphai/vanilla";
2
+ import * as agents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
5
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
6
- // const { default: __, ...vanillaAgents } = agents;
6
+ const vanillaAgents = agents.default ?? agents;
7
7
  const graph_data = {
8
8
  version: 0.5,
9
9
  nodes: {
@@ -1,2 +1,3 @@
1
+ import type { CallbackFunction } from "graphai";
1
2
  import { MulmoStudioContext } from "../types/index.js";
2
- export declare const images: (context: MulmoStudioContext) => Promise<void>;
3
+ export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,16 +1,17 @@
1
1
  import dotenv from "dotenv";
2
2
  import fs from "fs";
3
3
  import { GraphAI, GraphAILogger } from "graphai";
4
- import vanillaAgents from "@graphai/vanilla";
4
+ import * as agents from "@graphai/vanilla";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
7
7
  import { fileCacheAgentFilter } from "../utils/filters.js";
8
8
  import imageGoogleAgent from "../agents/image_google_agent.js";
9
9
  import imageOpenaiAgent from "../agents/image_openai_agent.js";
10
+ import movieGoogleAgent from "../agents/movie_google_agent.js";
10
11
  import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
12
  import { imagePlugins } from "../utils/image_plugins/index.js";
12
13
  import { imagePrompt } from "../utils/prompt.js";
13
- // const { default: __, ...vanillaAgents } = agents;
14
+ const vanillaAgents = agents.default ?? agents;
14
15
  dotenv.config();
15
16
  // const openai = new OpenAI();
16
17
  import { GoogleAuth } from "google-auth-library";
@@ -26,8 +27,8 @@ const imagePreprocessAgent = async (namedInputs) => {
26
27
  const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
27
28
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
28
29
  const returnValue = {
29
- aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
30
30
  imageParams,
31
+ movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
31
32
  };
32
33
  if (beat.image) {
33
34
  const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
@@ -37,20 +38,24 @@ const imagePreprocessAgent = async (namedInputs) => {
37
38
  const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
38
39
  const path = await plugin.process(processorParams);
39
40
  // undefined prompt indicates that image generation is not needed
40
- return { path, ...returnValue };
41
+ return { imagePath: path, ...returnValue };
41
42
  }
42
43
  finally {
43
44
  MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
44
45
  }
45
46
  }
46
47
  }
47
- const prompt = imagePrompt(beat, imageParams.style);
48
+ // images for "edit_image"
48
49
  const images = (() => {
49
50
  const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
50
51
  const sources = imageNames.map((name) => imageRefs[name]);
51
52
  return sources.filter((source) => source !== undefined);
52
53
  })();
53
- return { path: imagePath, prompt, ...returnValue, images };
54
+ if (beat.moviePrompt && !beat.imagePrompt) {
55
+ return { ...returnValue, images }; // no image prompt, only movie prompt
56
+ }
57
+ const prompt = imagePrompt(beat, imageParams.style);
58
+ return { imagePath, prompt, ...returnValue, images };
54
59
  };
55
60
  const graph_data = {
56
61
  version: 0.5,
@@ -95,18 +100,36 @@ const graph_data = {
95
100
  retry: 3,
96
101
  inputs: {
97
102
  prompt: ":preprocessor.prompt",
98
- file: ":preprocessor.path", // only for fileCacheAgentFilter
103
+ images: ":preprocessor.images",
104
+ file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
99
105
  text: ":preprocessor.prompt", // only for fileCacheAgentFilter
100
- force: ":context.force",
101
- studio: ":context.studio", // for cache
102
- index: ":__mapIndex", // for cache
103
- sessionType: "image", // for cache
106
+ force: ":context.force", // only for fileCacheAgentFilter
107
+ studio: ":context.studio", // for fileCacheAgentFilter
108
+ index: ":__mapIndex", // for fileCacheAgentFilter
109
+ sessionType: "image", // for fileCacheAgentFilter
104
110
  params: {
105
111
  model: ":preprocessor.imageParams.model",
106
- size: ":preprocessor.imageParams.size",
107
112
  moderation: ":preprocessor.imageParams.moderation",
108
- aspectRatio: ":preprocessor.aspectRatio",
109
- images: ":preprocessor.images",
113
+ canvasSize: ":context.studio.script.canvasSize",
114
+ },
115
+ },
116
+ defaultValue: {},
117
+ },
118
+ movieGenerator: {
119
+ if: ":preprocessor.movieFile",
120
+ agent: "movieGoogleAgent",
121
+ inputs: {
122
+ onComplete: ":imageGenerator", // to wait for imageGenerator to finish
123
+ prompt: ":beat.moviePrompt",
124
+ imagePath: ":preprocessor.imagePath",
125
+ file: ":preprocessor.movieFile",
126
+ studio: ":context.studio", // for cache
127
+ index: ":__mapIndex", // for cache
128
+ sessionType: "movie", // for cache
129
+ params: {
130
+ model: ":context.studio.script.movieParams.model",
131
+ duration: ":beat.duration",
132
+ canvasSize: ":context.studio.script.canvasSize",
110
133
  },
111
134
  },
112
135
  defaultValue: {},
@@ -114,11 +137,9 @@ const graph_data = {
114
137
  output: {
115
138
  agent: "copyAgent",
116
139
  inputs: {
117
- result: ":imageGenerator",
118
- image: ":preprocessor.path",
119
- },
120
- output: {
121
- imageFile: ".image",
140
+ onComplete: ":movieGenerator",
141
+ imageFile: ":preprocessor.imagePath",
142
+ movieFile: ":preprocessor.movieFile",
122
143
  },
123
144
  isResult: true,
124
145
  },
@@ -141,7 +162,7 @@ const graph_data = {
141
162
  context: ":context",
142
163
  },
143
164
  },
144
- writeOutout: {
165
+ writeOutput: {
145
166
  // console: { before: true },
146
167
  agent: "fileWriteAgent",
147
168
  inputs: {
@@ -152,14 +173,20 @@ const graph_data = {
152
173
  },
153
174
  };
154
175
  const googleAuth = async () => {
155
- const auth = new GoogleAuth({
156
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
157
- });
158
- const client = await auth.getClient();
159
- const accessToken = await client.getAccessToken();
160
- return accessToken.token;
176
+ try {
177
+ const auth = new GoogleAuth({
178
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
179
+ });
180
+ const client = await auth.getClient();
181
+ const accessToken = await client.getAccessToken();
182
+ return accessToken.token;
183
+ }
184
+ catch (__error) {
185
+ GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
186
+ process.exit(1);
187
+ }
161
188
  };
162
- const generateImages = async (context) => {
189
+ const generateImages = async (context, callbacks) => {
163
190
  const { studio, fileDirs } = context;
164
191
  const { outDirPath, imageDirPath } = fileDirs;
165
192
  mkdir(`${imageDirPath}/${studio.filename}`);
@@ -167,7 +194,7 @@ const generateImages = async (context) => {
167
194
  {
168
195
  name: "fileCacheAgentFilter",
169
196
  agent: fileCacheAgentFilter,
170
- nodeIds: ["imageGenerator"],
197
+ nodeIds: ["imageGenerator", "movieGenerator"],
171
198
  },
172
199
  ];
173
200
  const options = {
@@ -175,7 +202,7 @@ const generateImages = async (context) => {
175
202
  };
176
203
  const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
177
204
  // We need to get google's auth token only if the google is the text2image provider.
178
- if (imageAgentInfo.provider === "google") {
205
+ if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
179
206
  GraphAILogger.log("google was specified as text2image engine");
180
207
  const token = await googleAuth();
181
208
  options.config = {
@@ -183,6 +210,10 @@ const generateImages = async (context) => {
183
210
  projectId: process.env.GOOGLE_PROJECT_ID,
184
211
  token,
185
212
  },
213
+ movieGoogleAgent: {
214
+ projectId: process.env.GOOGLE_PROJECT_ID,
215
+ token,
216
+ },
186
217
  };
187
218
  }
188
219
  if (imageAgentInfo.provider === "openai") {
@@ -219,16 +250,21 @@ const generateImages = async (context) => {
219
250
  imageDirPath,
220
251
  imageRefs,
221
252
  };
222
- const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
253
+ const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
223
254
  Object.keys(injections).forEach((key) => {
224
255
  graph.injectValue(key, injections[key]);
225
256
  });
257
+ if (callbacks) {
258
+ callbacks.forEach((callback) => {
259
+ graph.registerCallback(callback);
260
+ });
261
+ }
226
262
  await graph.run();
227
263
  };
228
- export const images = async (context) => {
264
+ export const images = async (context, callbacks) => {
229
265
  try {
230
266
  MulmoStudioMethods.setSessionState(context.studio, "image", true);
231
- await generateImages(context);
267
+ await generateImages(context, callbacks);
232
268
  }
233
269
  finally {
234
270
  MulmoStudioMethods.setSessionState(context.studio, "image", false);
@@ -61,8 +61,8 @@ const getOutputOption = (audioId) => {
61
61
  const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
62
62
  const start = performance.now();
63
63
  const ffmpegContext = FfmpegContextInit();
64
- if (studio.beats.some((beat) => !beat.imageFile)) {
65
- GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
64
+ if (studio.beats.some((beat) => !beat.imageFile && !beat.movieFile)) {
65
+ GraphAILogger.info("beat.imageFile or beat.movieFile is not set. Please run `yarn run images ${file}` ");
66
66
  return;
67
67
  }
68
68
  const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
@@ -71,11 +71,15 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
71
71
  const filterComplexAudioIds = [];
72
72
  studio.beats.reduce((timestamp, studioBeat, index) => {
73
73
  const beat = studio.script.beats[index];
74
- if (!studioBeat.imageFile || !studioBeat.duration) {
75
- throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
74
+ const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
75
+ if (!sourceFile) {
76
+ throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
76
77
  }
77
- const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.imageFile);
78
- const mediaType = MulmoScriptMethods.getImageType(studio.script, beat);
78
+ if (!studioBeat.duration) {
79
+ throw new Error(`studioBeat.duration is not set: index=${index}`);
80
+ }
81
+ const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
82
+ const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
79
83
  const extraPadding = (() => {
80
84
  // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
81
85
  if (index === 0) {
@@ -1,3 +1,4 @@
1
1
  import "dotenv/config";
2
+ import type { CallbackFunction } from "graphai";
2
3
  import { MulmoStudioContext } from "../types/index.js";
3
- export declare const translate: (context: MulmoStudioContext) => Promise<void>;
4
+ export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -1,13 +1,13 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI, assert } from "graphai";
3
- import vanillaAgents from "@graphai/vanilla";
3
+ import * as agents from "@graphai/vanilla";
4
4
  import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
8
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
9
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
10
- // const { default: __, ...vanillaAgents } = agents;
10
+ const vanillaAgents = agents.default ?? agents;
11
11
  const translateGraph = {
12
12
  version: 0.5,
13
13
  nodes: {
@@ -208,7 +208,7 @@ const agentFilters = [
208
208
  ];
209
209
  const defaultLang = "en";
210
210
  const targetLangs = ["ja", "en"];
211
- export const translate = async (context) => {
211
+ export const translate = async (context, callbacks) => {
212
212
  try {
213
213
  MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
214
214
  const { studio, fileDirs } = context;
@@ -222,6 +222,11 @@ export const translate = async (context) => {
222
222
  graph.injectValue("targetLangs", targetLangs);
223
223
  graph.injectValue("outDirPath", outDirPath);
224
224
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
225
+ if (callbacks) {
226
+ callbacks.forEach((callback) => {
227
+ graph.registerCallback(callback);
228
+ });
229
+ }
225
230
  const results = await graph.run();
226
231
  writingMessage(outputStudioFilePath);
227
232
  if (results.mergeStudioResult) {
@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
26
26
  const totalPadding = await (async () => {
27
27
  if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
28
28
  const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
29
+ // NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
29
30
  const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
30
31
  if (movieDuration > audioDuration) {
31
32
  return padding + (movieDuration - audioDuration);
32
33
  }
33
34
  }
35
+ else if (beat.duration && beat.duration > audioDuration) {
36
+ return padding + (beat.duration - audioDuration);
37
+ }
34
38
  return padding;
35
39
  })();
36
40
  studioBeat.duration = audioDuration + totalPadding;
@@ -5,7 +5,10 @@ export type ImageGoogleConfig = {
5
5
  };
6
6
  export declare const imageGoogleAgent: AgentFunction<{
7
7
  model: string;
8
- aspectRatio: string;
8
+ canvasSize: {
9
+ width: number;
10
+ height: number;
11
+ };
9
12
  }, {
10
13
  buffer: Buffer;
11
14
  }, {
@@ -1,4 +1,5 @@
1
1
  import { GraphAILogger } from "graphai";
2
+ import { getAspectRatio } from "./movie_google_agent.js";
2
3
  async function generateImage(projectId, model, token, prompt, aspectRatio) {
3
4
  const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
4
5
  try {
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
50
51
  throw error;
51
52
  }
52
53
  }
53
- export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
54
+ export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
54
55
  const { prompt } = namedInputs;
55
- const aspectRatio = params.aspectRatio ?? "16:9";
56
+ const aspectRatio = getAspectRatio(params.canvasSize);
56
57
  const model = params.model ?? "imagen-3.0-fast-generate-001";
57
58
  //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
58
59
  const projectId = config?.projectId;
@@ -1,16 +1,18 @@
1
1
  import { AgentFunction, AgentFunctionInfo } from "graphai";
2
- type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
3
2
  type OpenAIModeration = "low" | "auto";
4
3
  export declare const imageOpenaiAgent: AgentFunction<{
5
4
  apiKey: string;
6
5
  model: string;
7
- size: OpenAIImageSize | null | undefined;
8
6
  moderation: OpenAIModeration | null | undefined;
9
- images: string[] | null | undefined;
7
+ canvasSize: {
8
+ width: number;
9
+ height: number;
10
+ };
10
11
  }, {
11
12
  buffer: Buffer;
12
13
  }, {
13
14
  prompt: string;
15
+ images: string[] | null | undefined;
14
16
  }>;
15
17
  declare const imageOpenaiAgentInfo: AgentFunctionInfo;
16
18
  export default imageOpenaiAgentInfo;
@@ -2,14 +2,39 @@ import fs from "fs";
2
2
  import OpenAI, { toFile } from "openai";
3
3
  // https://platform.openai.com/docs/guides/image-generation
4
4
  export const imageOpenaiAgent = async ({ namedInputs, params }) => {
5
- const { prompt } = namedInputs;
6
- const { apiKey, model, size, moderation, images } = params;
5
+ const { prompt, images } = namedInputs;
6
+ const { apiKey, moderation, canvasSize } = params;
7
+ const model = params.model ?? "dall-e-3";
7
8
  const openai = new OpenAI({ apiKey });
9
+ const size = (() => {
10
+ if (model === "gpt-image-1") {
11
+ if (canvasSize.width > canvasSize.height) {
12
+ return "1536x1024";
13
+ }
14
+ else if (canvasSize.width < canvasSize.height) {
15
+ return "1024x1536";
16
+ }
17
+ else {
18
+ return "1024x1024";
19
+ }
20
+ }
21
+ else {
22
+ if (canvasSize.width > canvasSize.height) {
23
+ return "1792x1024";
24
+ }
25
+ else if (canvasSize.width < canvasSize.height) {
26
+ return "1024x1792";
27
+ }
28
+ else {
29
+ return "1024x1024";
30
+ }
31
+ }
32
+ })();
8
33
  const imageOptions = {
9
- model: model ?? "dall-e-3",
34
+ model,
10
35
  prompt,
11
36
  n: 1,
12
- size: size || model === "gpt-image-1" ? "1536x1024" : "1792x1024",
37
+ size,
13
38
  };
14
39
  if (model === "gpt-image-1") {
15
40
  imageOptions.moderation = moderation || "auto";