mulmocast 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/assets/templates/akira_comic.json +28 -0
  2. package/assets/templates/children_book.json +13 -0
  3. package/assets/templates/comic_strips.json +14 -1
  4. package/assets/templates/drslump_comic.json +28 -0
  5. package/assets/templates/ghibli_comic.json +28 -0
  6. package/assets/templates/ghost_comic.json +35 -0
  7. package/assets/templates/onepiece_comic.json +28 -0
  8. package/assets/templates/portrait_movie.json +28 -0
  9. package/assets/templates/realistic_movie.json +28 -0
  10. package/assets/templates/sensei_and_taro.json +21 -0
  11. package/lib/actions/audio.js +1 -1
  12. package/lib/actions/captions.js +1 -1
  13. package/lib/actions/images.js +98 -13
  14. package/lib/actions/movie.d.ts +1 -1
  15. package/lib/actions/movie.js +13 -11
  16. package/lib/actions/pdf.js +6 -4
  17. package/lib/actions/translate.js +1 -1
  18. package/lib/agents/image_openai_agent.d.ts +1 -0
  19. package/lib/agents/image_openai_agent.js +16 -4
  20. package/lib/agents/movie_google_agent.d.ts +17 -0
  21. package/lib/agents/movie_google_agent.js +114 -0
  22. package/lib/cli/bin.js +19 -0
  23. package/lib/cli/helpers.js +2 -1
  24. package/lib/methods/mulmo_studio.d.ts +1 -1
  25. package/lib/tools/create_mulmo_script_from_url.js +1 -1
  26. package/lib/tools/create_mulmo_script_interactively.js +1 -1
  27. package/lib/tools/story_to_script.js +1 -1
  28. package/lib/types/schema.d.ts +1966 -322
  29. package/lib/types/schema.js +21 -3
  30. package/lib/types/type.d.ts +3 -1
  31. package/lib/utils/file.js +20 -9
  32. package/lib/utils/pdf.d.ts +1 -0
  33. package/lib/utils/pdf.js +5 -3
  34. package/lib/utils/preprocess.d.ts +57 -16
  35. package/lib/utils/utils.d.ts +1 -0
  36. package/lib/utils/utils.js +3 -0
  37. package/package.json +9 -9
  38. package/scripts/templates/children_book.json +0 -7
  39. package/scripts/templates/image_prompts_template.json +41 -0
  40. package/scripts/templates/movie_prompts_template.json +50 -0
  41. package/scripts/templates/sensei_and_taro.json +0 -11
  42. package/scripts/templates/text_only_template.json +35 -0
  43. package/assets/templates/ghibli_strips.json +0 -6
  44. package/scripts/templates/comic_strips.json +0 -30
  45. package/scripts/templates/ghibli_strips.json +0 -30
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Akira style",
3
+ "description": "Template for Akira style comic presentation.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>AKIRA aesthetic.</style>",
16
+ "images": {
17
+ "girl": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -2,5 +2,18 @@
2
2
  "title": "Children Book",
3
3
  "description": "Template for children book.",
4
4
  "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
16
+ }
17
+ },
5
18
  "scriptName": "children_book.json"
6
19
  }
@@ -2,5 +2,18 @@
2
2
  "title": "American Comic Strips",
3
3
  "description": "Template for Dilbert-style comic strips.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
- "scriptName": "comic_strips.json"
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
16
+ }
17
+ },
18
+ "scriptName": "text_only_template.json"
6
19
  }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Dr. Slump Style",
3
+ "description": "Template for Dr. Slump style comic presentation.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
16
+ "images": {
17
+ "girl": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Ghibli comic style",
3
+ "description": "Template for Ghibli-style comic presentation.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghibli style</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "title": "Ghost in the shell style",
3
+ "description": "Template for Ghost in the shell style comic presentation.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghost in the shell aesthetic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
22
+ }
23
+ },
24
+ "optimus": {
25
+ "type": "image",
26
+ "source": {
27
+ "kind": "url",
28
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
29
+ }
30
+ }
31
+ }
32
+ }
33
+ },
34
+ "scriptName": "image_prompts_template.json"
35
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "One Piece style",
3
+ "description": "Template for One Piece style comic presentation.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>One Piece aesthetic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Photo realistic movie (portrait)",
3
+ "description": "Template for photo realistic movie in portrait mode.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1024,
12
+ "height": 1536
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Photo realistic, cinematic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "movie_prompts_template.json"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Photo realistic movie template",
3
+ "description": "Template for photo realistic movie.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Photo realistic, cinematic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "movie_prompts_template.json"
28
+ }
@@ -2,5 +2,26 @@
2
2
  "title": "Student and Teacher",
3
3
  "description": "Interactive discussion between a student and teacher",
4
4
  "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
16
+ },
17
+ "speechParams": {
18
+ "provider": "nijivoice",
19
+ "speakers": {
20
+ "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
21
+ "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
22
+ "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
23
+ }
24
+ }
25
+ },
5
26
  "scriptName": "sensei_and_taro.json"
6
27
  }
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
12
12
  import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
13
  import { text2hash, localizedText } from "../utils/utils.js";
14
14
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
15
- const { default: __, ...vanillaAgents } = agents;
15
+ const vanillaAgents = agents.default ?? agents;
16
16
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
17
17
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
18
18
  const provider_to_agent = {
@@ -3,7 +3,7 @@ import * as agents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
5
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
6
- const { default: __, ...vanillaAgents } = agents;
6
+ const vanillaAgents = agents.default ?? agents;
7
7
  const graph_data = {
8
8
  version: 0.5,
9
9
  nodes: {
@@ -1,4 +1,5 @@
1
1
  import dotenv from "dotenv";
2
+ import fs from "fs";
2
3
  import { GraphAI, GraphAILogger } from "graphai";
3
4
  import * as agents from "@graphai/vanilla";
4
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
@@ -6,10 +7,11 @@ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
6
7
  import { fileCacheAgentFilter } from "../utils/filters.js";
7
8
  import imageGoogleAgent from "../agents/image_google_agent.js";
8
9
  import imageOpenaiAgent from "../agents/image_openai_agent.js";
9
- import { MulmoScriptMethods } from "../methods/index.js";
10
+ import movieGoogleAgent from "../agents/movie_google_agent.js";
11
+ import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
10
12
  import { imagePlugins } from "../utils/image_plugins/index.js";
11
13
  import { imagePrompt } from "../utils/prompt.js";
12
- const { default: __, ...vanillaAgents } = agents;
14
+ const vanillaAgents = agents.default ?? agents;
13
15
  dotenv.config();
14
16
  // const openai = new OpenAI();
15
17
  import { GoogleAuth } from "google-auth-library";
@@ -21,8 +23,12 @@ const htmlStyle = (script, beat) => {
21
23
  };
22
24
  };
23
25
  const imagePreprocessAgent = async (namedInputs) => {
24
- const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
26
+ const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
25
27
  const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
28
+ if (!imageParams.size) {
29
+ const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
30
+ imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
31
+ }
26
32
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
27
33
  const returnValue = {
28
34
  aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
@@ -44,7 +50,12 @@ const imagePreprocessAgent = async (namedInputs) => {
44
50
  }
45
51
  }
46
52
  const prompt = imagePrompt(beat, imageParams.style);
47
- return { path: imagePath, prompt, ...returnValue };
53
+ const images = (() => {
54
+ const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
55
+ const sources = imageNames.map((name) => imageRefs[name]);
56
+ return sources.filter((source) => source !== undefined);
57
+ })();
58
+ return { path: imagePath, prompt, ...returnValue, images };
48
59
  };
49
60
  const graph_data = {
50
61
  version: 0.5,
@@ -54,9 +65,16 @@ const graph_data = {
54
65
  imageDirPath: {},
55
66
  imageAgentInfo: {},
56
67
  outputStudioFilePath: {},
68
+ imageRefs: {},
57
69
  map: {
58
70
  agent: "mapAgent",
59
- inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
71
+ inputs: {
72
+ rows: ":context.studio.script.beats",
73
+ context: ":context",
74
+ imageAgentInfo: ":imageAgentInfo",
75
+ imageDirPath: ":imageDirPath",
76
+ imageRefs: ":imageRefs",
77
+ },
60
78
  isResult: true,
61
79
  params: {
62
80
  rowKey: "beat",
@@ -73,6 +91,7 @@ const graph_data = {
73
91
  suffix: "p",
74
92
  imageDirPath: ":imageDirPath",
75
93
  imageAgentInfo: ":imageAgentInfo",
94
+ imageRefs: ":imageRefs",
76
95
  },
77
96
  },
78
97
  imageGenerator: {
@@ -92,6 +111,43 @@ const graph_data = {
92
111
  size: ":preprocessor.imageParams.size",
93
112
  moderation: ":preprocessor.imageParams.moderation",
94
113
  aspectRatio: ":preprocessor.aspectRatio",
114
+ images: ":preprocessor.images",
115
+ },
116
+ },
117
+ defaultValue: {},
118
+ },
119
+ prepareMovie: {
120
+ agent: (namedInputs) => {
121
+ const { beat, imageDirPath, index, context } = namedInputs;
122
+ if (beat.moviePrompt) {
123
+ const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
124
+ return { movieFile };
125
+ }
126
+ return {};
127
+ },
128
+ inputs: {
129
+ result: ":imageGenerator", // to wait for imageGenerator to finish
130
+ imagePath: ":preprocessor.path",
131
+ beat: ":beat",
132
+ imageDirPath: ":imageDirPath",
133
+ index: ":__mapIndex",
134
+ context: ":context",
135
+ },
136
+ },
137
+ movieGenerator: {
138
+ if: ":prepareMovie.movieFile",
139
+ agent: "movieGoogleAgent",
140
+ inputs: {
141
+ prompt: ":beat.moviePrompt",
142
+ imagePath: ":preprocessor.path",
143
+ file: ":prepareMovie.movieFile",
144
+ studio: ":context.studio", // for cache
145
+ index: ":__mapIndex", // for cache
146
+ sessionType: "movie", // for cache
147
+ params: {
148
+ model: ":context.studio.script.movieParams.model",
149
+ aspectRatio: ":preprocessor.aspectRatio",
150
+ duration: ":beat.duration",
95
151
  },
96
152
  },
97
153
  defaultValue: {},
@@ -99,11 +155,9 @@ const graph_data = {
99
155
  output: {
100
156
  agent: "copyAgent",
101
157
  inputs: {
102
- result: ":imageGenerator",
103
- image: ":preprocessor.path",
104
- },
105
- output: {
106
- imageFile: ".image",
158
+ onComplete: ":movieGenerator",
159
+ imageFile: ":preprocessor.path",
160
+ movieFile: ":prepareMovie.movieFile",
107
161
  },
108
162
  isResult: true,
109
163
  },
@@ -152,7 +206,7 @@ const generateImages = async (context) => {
152
206
  {
153
207
  name: "fileCacheAgentFilter",
154
208
  agent: fileCacheAgentFilter,
155
- nodeIds: ["imageGenerator"],
209
+ nodeIds: ["imageGenerator", "movieGenerator"],
156
210
  },
157
211
  ];
158
212
  const options = {
@@ -160,7 +214,7 @@ const generateImages = async (context) => {
160
214
  };
161
215
  const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
162
216
  // We need to get google's auth token only if the google is the text2image provider.
163
- if (imageAgentInfo.provider === "google") {
217
+ if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
164
218
  GraphAILogger.log("google was specified as text2image engine");
165
219
  const token = await googleAuth();
166
220
  options.config = {
@@ -168,16 +222,47 @@ const generateImages = async (context) => {
168
222
  projectId: process.env.GOOGLE_PROJECT_ID,
169
223
  token,
170
224
  },
225
+ movieGoogleAgent: {
226
+ projectId: process.env.GOOGLE_PROJECT_ID,
227
+ token,
228
+ },
171
229
  };
172
230
  }
231
+ if (imageAgentInfo.provider === "openai") {
232
+ // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
233
+ // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
234
+ // gpt-image-1:3,000,000 TPM、150 images per minute
235
+ graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
236
+ }
237
+ const imageRefs = {};
238
+ const images = studio.script.imageParams?.images;
239
+ if (images) {
240
+ await Promise.all(Object.keys(images).map(async (key) => {
241
+ const image = images[key];
242
+ if (image.source.kind === "path") {
243
+ imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
244
+ }
245
+ else if (image.source.kind === "url") {
246
+ const response = await fetch(image.source.url);
247
+ if (!response.ok) {
248
+ throw new Error(`Failed to download image: ${image.source.url}`);
249
+ }
250
+ const buffer = Buffer.from(await response.arrayBuffer());
251
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
252
+ await fs.promises.writeFile(imagePath, buffer);
253
+ imageRefs[key] = imagePath;
254
+ }
255
+ }));
256
+ }
173
257
  GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
174
258
  const injections = {
175
259
  context,
176
260
  imageAgentInfo,
177
261
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
178
262
  imageDirPath,
263
+ imageRefs,
179
264
  };
180
- const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
265
+ const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
181
266
  Object.keys(injections).forEach((key) => {
182
267
  graph.injectValue(key, injections[key]);
183
268
  });
@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
3
3
  videoId: string;
4
4
  videoPart: string;
5
5
  };
6
- export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
6
+ export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
7
7
  audioId: string;
8
8
  audioPart: string;
9
9
  };
@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
26
26
  videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
27
27
  };
28
28
  };
29
- export const getAudioPart = (inputIndex, duration, delay) => {
29
+ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
30
30
  const audioId = `a${inputIndex}`;
31
31
  return {
32
32
  audioId,
33
33
  audioPart: `[${inputIndex}:a]` +
34
34
  `atrim=duration=${duration},` + // Trim to beat duration
35
35
  `adelay=${delay * 1000}|${delay * 1000},` +
36
+ `volume=${mixAudio},` + // 👈 add this line
36
37
  `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
37
38
  `[${audioId}]`,
38
39
  };
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
68
69
  // Add each image input
69
70
  const filterComplexVideoIds = [];
70
71
  const filterComplexAudioIds = [];
71
- studio.beats.reduce((timestamp, beat, index) => {
72
- if (!beat.imageFile || !beat.duration) {
73
- throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
72
+ studio.beats.reduce((timestamp, studioBeat, index) => {
73
+ const beat = studio.script.beats[index];
74
+ if (!studioBeat.imageFile || !studioBeat.duration) {
75
+ throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
74
76
  }
75
- const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
76
- const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
77
+ const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.movieFile ?? studioBeat.imageFile);
78
+ const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
77
79
  const extraPadding = (() => {
78
80
  // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
79
81
  if (index === 0) {
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
84
86
  }
85
87
  return 0;
86
88
  })();
87
- const duration = beat.duration + extraPadding;
89
+ const duration = studioBeat.duration + extraPadding;
88
90
  const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
89
91
  ffmpegContext.filterComplex.push(videoPart);
90
- if (caption && beat.captionFile) {
91
- const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
92
+ if (caption && studioBeat.captionFile) {
93
+ const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
92
94
  const compositeVideoId = `c${index}`;
93
95
  ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
94
96
  filterComplexVideoIds.push(compositeVideoId);
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
96
98
  else {
97
99
  filterComplexVideoIds.push(videoId);
98
100
  }
99
- if (mediaType === "movie") {
100
- const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
101
+ if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
102
+ const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
101
103
  filterComplexAudioIds.push(audioId);
102
104
  ffmpegContext.filterComplex.push(audioPart);
103
105
  }
@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
102
102
  const pos = (() => {
103
103
  if (isLandscapeImage) {
104
104
  const cellHeight = pageHeight / imagesPerPage - offset;
105
- const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
106
- const x = offset;
105
+ const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
106
+ const x = offset + (containerWidth - drawWidth) / 2;
107
107
  const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
108
108
  return {
109
109
  x,
110
110
  y,
111
111
  width: drawWidth,
112
112
  height: drawHeight,
113
+ containerWidth,
113
114
  };
114
115
  }
115
116
  else {
116
117
  const cellWidth = pageWidth / imagesPerPage;
117
- const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
118
+ const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
118
119
  const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
119
120
  const y = pageHeight - drawHeight - offset;
120
121
  return {
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
122
123
  y,
123
124
  width: drawWidth,
124
125
  height: drawHeight,
126
+ containerWidth,
125
127
  };
126
128
  }
127
129
  })();
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
136
138
  for (const [index, line] of lines.entries()) {
137
139
  page.drawText(line, {
138
140
  ...pos,
139
- x: pos.x + pos.width + textMargin,
141
+ x: offset + pos.containerWidth + textMargin,
140
142
  y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
141
143
  size: fontSize,
142
144
  font,
@@ -7,7 +7,7 @@ import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/strin
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
8
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
9
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
10
- const { default: __, ...vanillaAgents } = agents;
10
+ const vanillaAgents = agents.default ?? agents;
11
11
  const translateGraph = {
12
12
  version: 0.5,
13
13
  nodes: {
@@ -6,6 +6,7 @@ export declare const imageOpenaiAgent: AgentFunction<{
6
6
  model: string;
7
7
  size: OpenAIImageSize | null | undefined;
8
8
  moderation: OpenAIModeration | null | undefined;
9
+ images: string[] | null | undefined;
9
10
  }, {
10
11
  buffer: Buffer;
11
12
  }, {