mulmocast 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +18 -3
  2. package/assets/templates/ghibli_shorts.json +34 -0
  3. package/assets/templates/shorts.json +18 -0
  4. package/assets/templates/trailer.json +25 -0
  5. package/lib/actions/audio.d.ts +2 -1
  6. package/lib/actions/audio.js +35 -17
  7. package/lib/actions/captions.js +5 -5
  8. package/lib/actions/images.d.ts +2 -1
  9. package/lib/actions/images.js +90 -58
  10. package/lib/actions/movie.js +53 -16
  11. package/lib/actions/pdf.js +3 -3
  12. package/lib/actions/translate.d.ts +2 -1
  13. package/lib/actions/translate.js +21 -16
  14. package/lib/agents/combine_audio_files_agent.js +4 -0
  15. package/lib/agents/image_google_agent.d.ts +4 -1
  16. package/lib/agents/image_google_agent.js +3 -2
  17. package/lib/agents/image_openai_agent.d.ts +5 -3
  18. package/lib/agents/image_openai_agent.js +35 -7
  19. package/lib/agents/index.d.ts +2 -1
  20. package/lib/agents/index.js +2 -1
  21. package/lib/agents/movie_google_agent.d.ts +9 -2
  22. package/lib/agents/movie_google_agent.js +24 -16
  23. package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
  24. package/lib/agents/tts_elevenlabs_agent.js +60 -0
  25. package/lib/agents/tts_google_agent.js +1 -1
  26. package/lib/agents/tts_nijivoice_agent.js +3 -2
  27. package/lib/agents/tts_openai_agent.js +1 -1
  28. package/lib/cli/commands/audio/handler.js +4 -1
  29. package/lib/cli/commands/image/handler.js +4 -1
  30. package/lib/cli/commands/movie/handler.js +4 -1
  31. package/lib/cli/commands/pdf/handler.js +4 -1
  32. package/lib/cli/commands/translate/handler.js +4 -1
  33. package/lib/cli/helpers.d.ts +3 -3
  34. package/lib/cli/helpers.js +38 -20
  35. package/lib/index.d.ts +5 -0
  36. package/lib/index.js +5 -0
  37. package/lib/methods/mulmo_media_source.d.ts +1 -0
  38. package/lib/methods/mulmo_media_source.js +12 -0
  39. package/lib/methods/mulmo_script.d.ts +1 -1
  40. package/lib/methods/mulmo_script.js +9 -5
  41. package/lib/methods/mulmo_studio_context.d.ts +5 -0
  42. package/lib/methods/mulmo_studio_context.js +23 -0
  43. package/lib/types/index.d.ts +1 -0
  44. package/lib/types/index.js +1 -0
  45. package/lib/types/schema.d.ts +1513 -290
  46. package/lib/types/schema.js +26 -35
  47. package/lib/types/type.d.ts +4 -1
  48. package/lib/utils/file.d.ts +5 -15
  49. package/lib/utils/file.js +14 -21
  50. package/lib/utils/filters.js +4 -4
  51. package/lib/utils/image_plugins/beat.d.ts +4 -0
  52. package/lib/utils/image_plugins/beat.js +7 -0
  53. package/lib/utils/image_plugins/image.d.ts +1 -1
  54. package/lib/utils/image_plugins/index.d.ts +2 -1
  55. package/lib/utils/image_plugins/index.js +2 -1
  56. package/lib/utils/image_plugins/movie.d.ts +1 -1
  57. package/lib/utils/image_plugins/source.js +2 -2
  58. package/lib/utils/preprocess.d.ts +26 -23
  59. package/lib/utils/preprocess.js +4 -0
  60. package/package.json +8 -8
  61. package/scripts/templates/movie_prompts_no_text_template.json +50 -0
  62. package/scripts/templates/shorts_template.json +52 -0
package/README.md CHANGED
@@ -90,11 +90,26 @@ Create a `.env` file in your project directory with the following API keys:
90
90
  ```bash
91
91
  OPENAI_API_KEY=your_openai_api_key
92
92
  ```
93
- ### Optional
93
+
94
+ #### (Optional) For the advanced image generation model
94
95
  ```bash
95
96
  DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
96
- GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
97
- NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
97
+ ```
98
+
99
+ #### (Optional) For Google's image generation model
100
+ ```bash
101
+ GOOGLE_PROJECT_ID=your_google_project_id
102
+ ```
103
+
104
+ See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
105
+
106
+ #### (Optional) For Nijivoice's TTS model
107
+ ```bash
108
+ NIJIVOICE_API_KEY=your_nijivoice_api_key
109
+ ```
110
+
111
+ #### (Optional) to access web in mulmo tool
112
+ ```bash
98
113
  BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
99
114
  ```
100
115
 
@@ -0,0 +1,34 @@
1
+ {
2
+ "title": "Ghibli comic style",
3
+ "description": "Template for Ghibli-style comic presentation.",
4
+ "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1024,
12
+ "height": 1536
13
+ },
14
+ "speechParams": {
15
+ "provider": "nijivoice",
16
+ "speakers": {
17
+ "Presenter": { "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62", "speechOptions": { "speed": 1.5 } }
18
+ }
19
+ },
20
+ "imageParams": {
21
+ "style": "<style>Ghibli style</style>",
22
+ "images": {
23
+ "presenter": {
24
+ "type": "image",
25
+ "source": {
26
+ "kind": "url",
27
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"
28
+ }
29
+ }
30
+ }
31
+ }
32
+ },
33
+ "scriptName": "image_prompts_template.json"
34
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "title": "Short movie template",
3
+ "description": "Template for Youtube shorts.",
4
+ "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0"
8
+ },
9
+ "canvasSize": {
10
+ "width": 720,
11
+ "height": 1280
12
+ },
13
+ "imageParams": {
14
+ "style": "<style>Photo realistic, cinematic.</style>"
15
+ }
16
+ },
17
+ "scriptName": "movie_prompts_template.json"
18
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "title": "Movie Trailer template",
3
+ "description": "Template for A Movie Trailer.",
4
+ "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0"
8
+ },
9
+ "canvasSize": {
10
+ "width": 1280,
11
+ "height": 720
12
+ },
13
+ "imageParams": {
14
+ "style": "<style>Photo realistic, cinematic.</style>"
15
+ },
16
+ "audioParams": {
17
+ "padding": 0.0,
18
+ "introPadding": 0.0,
19
+ "closingPadding": 0.0,
20
+ "outroPadding": 2.5,
21
+ "bgm": { "kind": "url", "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/bgms/trailer_dramatic.mp3" }
22
+ }
23
+ },
24
+ "scriptName": "movie_prompts_no_text_template.json"
25
+ }
@@ -1,3 +1,4 @@
1
1
  import "dotenv/config";
2
+ import type { CallbackFunction } from "graphai";
2
3
  import { MulmoStudioContext } from "../types/index.js";
3
- export declare const audio: (context: MulmoStudioContext) => Promise<void>;
4
+ export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -6,12 +6,14 @@ import addBGMAgent from "../agents/add_bgm_agent.js";
6
6
  import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
7
7
  import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
8
  import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
+ import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
9
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
10
11
  import { MulmoScriptMethods } from "../methods/index.js";
11
12
  import { fileCacheAgentFilter } from "../utils/filters.js";
12
- import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
+ import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, } from "../utils/file.js";
13
14
  import { text2hash, localizedText } from "../utils/utils.js";
14
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
15
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
16
+ import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
15
17
  const vanillaAgents = agents.default ?? agents;
16
18
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
17
19
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
@@ -19,10 +21,11 @@ const provider_to_agent = {
19
21
  nijivoice: "ttsNijivoiceAgent",
20
22
  openai: "ttsOpenaiAgent",
21
23
  google: "ttsGoogleAgent",
24
+ elevenlabs: "ttsElevenlabsAgent",
22
25
  };
23
26
  const getAudioPath = (context, beat, audioFile, audioDirPath) => {
24
27
  if (beat.audio?.type === "audio") {
25
- const path = resolveMediaSource(beat.audio.source, context);
28
+ const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
26
29
  if (path) {
27
30
  return path;
28
31
  }
@@ -34,18 +37,21 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
34
37
  return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
35
38
  };
36
39
  const preprocessor = (namedInputs) => {
37
- const { beat, studioBeat, multiLingual, index, context, audioDirPath } = namedInputs;
40
+ const { beat, studioBeat, multiLingual, context, audioDirPath } = namedInputs;
38
41
  const { lang } = context;
39
- const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
42
+ const speaker = context.studio.script.speechParams.speakers[beat.speaker];
43
+ const voiceId = speaker.voiceId;
40
44
  const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
41
45
  const text = localizedText(beat, multiLingual, lang);
42
- const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
43
- const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
46
+ // Use speaker-specific provider if available, otherwise fall back to script-level provider
47
+ const provider = speaker.provider ?? context.studio.script.speechParams.provider;
48
+ const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
49
+ const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
44
50
  const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
45
51
  studioBeat.audioFile = audioPath;
46
52
  const needsTTS = !beat.audio && audioPath !== undefined;
47
53
  return {
48
- ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
54
+ ttsAgent: provider_to_agent[provider],
49
55
  studioBeat,
50
56
  voiceId,
51
57
  speechOptions,
@@ -62,7 +68,6 @@ const graph_tts = {
62
68
  beat: ":beat",
63
69
  studioBeat: ":studioBeat",
64
70
  multiLingual: ":multiLingual",
65
- index: ":__mapIndex",
66
71
  context: ":context",
67
72
  audioDirPath: ":audioDirPath",
68
73
  },
@@ -74,7 +79,7 @@ const graph_tts = {
74
79
  text: ":preprocessor.text",
75
80
  file: ":preprocessor.audioPath",
76
81
  force: ":context.force",
77
- studio: ":context.studio", // for cache
82
+ mulmoContext: ":context", // for cache
78
83
  index: ":__mapIndex", // for cache
79
84
  sessionType: "audio", // for cache
80
85
  params: {
@@ -96,6 +101,7 @@ const graph_data = {
96
101
  outputStudioFilePath: {},
97
102
  audioDirPath: {},
98
103
  audioSegmentDirPath: {},
104
+ musicFile: {},
99
105
  map: {
100
106
  agent: "mapAgent",
101
107
  inputs: {
@@ -130,14 +136,14 @@ const graph_data = {
130
136
  },
131
137
  addBGM: {
132
138
  agent: "addBGMAgent",
133
- params: {
134
- musicFile: process.env.PATH_BGM ?? defaultBGMPath,
135
- },
136
139
  inputs: {
137
140
  wait: ":combineFiles",
138
141
  voiceFile: ":audioCombinedFilePath",
139
142
  outputFile: ":audioArtifactFilePath",
140
143
  script: ":context.studio.script",
144
+ params: {
145
+ musicFile: ":musicFile",
146
+ },
141
147
  },
142
148
  isResult: true,
143
149
  },
@@ -160,9 +166,9 @@ const agentFilters = [
160
166
  nodeIds: ["tts"],
161
167
  },
162
168
  ];
163
- export const audio = async (context) => {
169
+ export const audio = async (context, callbacks) => {
164
170
  try {
165
- MulmoStudioMethods.setSessionState(context.studio, "audio", true);
171
+ MulmoStudioContextMethods.setSessionState(context, "audio", true);
166
172
  const { studio, fileDirs, lang } = context;
167
173
  const { outDirPath, audioDirPath } = fileDirs;
168
174
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
@@ -171,13 +177,19 @@ export const audio = async (context) => {
171
177
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
172
178
  mkdir(outDirPath);
173
179
  mkdir(audioSegmentDirPath);
174
- graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
180
+ // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
181
+ const hasLimitedConcurrencyProvider = Object.values(studio.script.speechParams.speakers).some((speaker) => {
182
+ const provider = speaker.provider ?? studio.script.speechParams.provider;
183
+ return provider === "nijivoice" || provider === "elevenlabs";
184
+ });
185
+ graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
175
186
  const graph = new GraphAI(graph_data, {
176
187
  ...vanillaAgents,
177
188
  fileWriteAgent,
178
189
  ttsOpenaiAgent,
179
190
  ttsNijivoiceAgent,
180
191
  ttsGoogleAgent,
192
+ ttsElevenlabsAgent,
181
193
  addBGMAgent,
182
194
  combineAudioFilesAgent,
183
195
  }, { agentFilters });
@@ -187,10 +199,16 @@ export const audio = async (context) => {
187
199
  graph.injectValue("outputStudioFilePath", outputStudioFilePath);
188
200
  graph.injectValue("audioSegmentDirPath", audioSegmentDirPath);
189
201
  graph.injectValue("audioDirPath", audioDirPath);
202
+ graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath);
203
+ if (callbacks) {
204
+ callbacks.forEach((callback) => {
205
+ graph.registerCallback(callback);
206
+ });
207
+ }
190
208
  await graph.run();
191
209
  writingMessage(audioCombinedFilePath);
192
210
  }
193
211
  finally {
194
- MulmoStudioMethods.setSessionState(context.studio, "audio", false);
212
+ MulmoStudioContextMethods.setSessionState(context, "audio", false);
195
213
  }
196
214
  };
@@ -2,7 +2,7 @@ import { GraphAI, GraphAILogger } from "graphai";
2
2
  import * as agents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
5
+ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
6
6
  const vanillaAgents = agents.default ?? agents;
7
7
  const graph_data = {
8
8
  version: 0.5,
@@ -22,7 +22,7 @@ const graph_data = {
22
22
  agent: async (namedInputs) => {
23
23
  const { beat, context, index } = namedInputs;
24
24
  try {
25
- MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, true);
25
+ MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
26
26
  const { fileDirs } = namedInputs.context;
27
27
  const { caption } = context;
28
28
  const { imageDirPath } = fileDirs;
@@ -47,7 +47,7 @@ const graph_data = {
47
47
  return imagePath;
48
48
  }
49
49
  finally {
50
- MulmoStudioMethods.setBeatSessionState(context.studio, "caption", index, false);
50
+ MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, false);
51
51
  }
52
52
  },
53
53
  inputs: {
@@ -64,12 +64,12 @@ const graph_data = {
64
64
  };
65
65
  export const captions = async (context) => {
66
66
  try {
67
- MulmoStudioMethods.setSessionState(context.studio, "caption", true);
67
+ MulmoStudioContextMethods.setSessionState(context, "caption", true);
68
68
  const graph = new GraphAI(graph_data, { ...vanillaAgents });
69
69
  graph.injectValue("context", context);
70
70
  await graph.run();
71
71
  }
72
72
  finally {
73
- MulmoStudioMethods.setSessionState(context.studio, "caption", false);
73
+ MulmoStudioContextMethods.setSessionState(context, "caption", false);
74
74
  }
75
75
  };
@@ -1,2 +1,3 @@
1
+ import type { CallbackFunction } from "graphai";
1
2
  import { MulmoStudioContext } from "../types/index.js";
2
- export declare const images: (context: MulmoStudioContext) => Promise<void>;
3
+ export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
@@ -15,7 +15,6 @@ const vanillaAgents = agents.default ?? agents;
15
15
  dotenv.config();
16
16
  // const openai = new OpenAI();
17
17
  import { GoogleAuth } from "google-auth-library";
18
- import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
19
18
  const htmlStyle = (script, beat) => {
20
19
  return {
21
20
  canvasSize: MulmoScriptMethods.getCanvasSize(script),
@@ -25,37 +24,37 @@ const htmlStyle = (script, beat) => {
25
24
  const imagePreprocessAgent = async (namedInputs) => {
26
25
  const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
27
26
  const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
28
- if (!imageParams.size) {
29
- const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
30
- imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
31
- }
32
27
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
33
28
  const returnValue = {
34
- aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
35
29
  imageParams,
30
+ movieFile: beat.moviePrompt ? `${imageDirPath}/${context.studio.filename}/${index}.mov` : undefined,
36
31
  };
37
32
  if (beat.image) {
38
33
  const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
39
34
  if (plugin) {
40
35
  try {
41
- MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, true);
36
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
42
37
  const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
43
38
  const path = await plugin.process(processorParams);
44
39
  // undefined prompt indicates that image generation is not needed
45
- return { path, ...returnValue };
40
+ return { imagePath: path, ...returnValue };
46
41
  }
47
42
  finally {
48
- MulmoStudioMethods.setBeatSessionState(context.studio, "image", index, false);
43
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
49
44
  }
50
45
  }
51
46
  }
52
- const prompt = imagePrompt(beat, imageParams.style);
47
+ // images for "edit_image"
53
48
  const images = (() => {
54
49
  const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
55
50
  const sources = imageNames.map((name) => imageRefs[name]);
56
51
  return sources.filter((source) => source !== undefined);
57
52
  })();
58
- return { path: imagePath, prompt, ...returnValue, images };
53
+ if (beat.moviePrompt && !beat.imagePrompt) {
54
+ return { ...returnValue, images }; // no image prompt, only movie prompt
55
+ }
56
+ const prompt = imagePrompt(beat, imageParams.style);
57
+ return { imagePath, prompt, ...returnValue, images };
59
58
  };
60
59
  const graph_data = {
61
60
  version: 0.5,
@@ -100,64 +99,53 @@ const graph_data = {
100
99
  retry: 3,
101
100
  inputs: {
102
101
  prompt: ":preprocessor.prompt",
103
- file: ":preprocessor.path", // only for fileCacheAgentFilter
102
+ images: ":preprocessor.images",
103
+ file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
104
104
  text: ":preprocessor.prompt", // only for fileCacheAgentFilter
105
- force: ":context.force",
106
- studio: ":context.studio", // for cache
107
- index: ":__mapIndex", // for cache
108
- sessionType: "image", // for cache
105
+ force: ":context.force", // only for fileCacheAgentFilter
106
+ mulmoContext: ":context", // for fileCacheAgentFilter
107
+ index: ":__mapIndex", // for fileCacheAgentFilter
108
+ sessionType: "image", // for fileCacheAgentFilter
109
109
  params: {
110
110
  model: ":preprocessor.imageParams.model",
111
- size: ":preprocessor.imageParams.size",
112
111
  moderation: ":preprocessor.imageParams.moderation",
113
- aspectRatio: ":preprocessor.aspectRatio",
114
- images: ":preprocessor.images",
112
+ canvasSize: ":context.studio.script.canvasSize",
115
113
  },
116
114
  },
117
115
  defaultValue: {},
118
116
  },
119
- prepareMovie: {
120
- agent: (namedInputs) => {
121
- const { beat, imageDirPath, index, context } = namedInputs;
122
- if (beat.moviePrompt) {
123
- const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
124
- return { movieFile };
125
- }
126
- return {};
127
- },
128
- inputs: {
129
- result: ":imageGenerator", // to wait for imageGenerator to finish
130
- imagePath: ":preprocessor.path",
131
- beat: ":beat",
132
- imageDirPath: ":imageDirPath",
133
- index: ":__mapIndex",
134
- context: ":context",
135
- },
136
- },
137
117
  movieGenerator: {
138
- if: ":prepareMovie.movieFile",
118
+ if: ":preprocessor.movieFile",
139
119
  agent: "movieGoogleAgent",
140
120
  inputs: {
121
+ onComplete: ":imageGenerator", // to wait for imageGenerator to finish
141
122
  prompt: ":beat.moviePrompt",
142
- imagePath: ":preprocessor.path",
143
- file: ":prepareMovie.movieFile",
123
+ imagePath: ":preprocessor.imagePath",
124
+ file: ":preprocessor.movieFile",
144
125
  studio: ":context.studio", // for cache
145
126
  index: ":__mapIndex", // for cache
146
127
  sessionType: "movie", // for cache
147
128
  params: {
148
129
  model: ":context.studio.script.movieParams.model",
149
- aspectRatio: ":preprocessor.aspectRatio",
150
130
  duration: ":beat.duration",
131
+ canvasSize: ":context.studio.script.canvasSize",
151
132
  },
152
133
  },
153
134
  defaultValue: {},
154
135
  },
136
+ onComplete: {
137
+ agent: "copyAgent",
138
+ inputs: {
139
+ onComplete: ":movieGenerator", // to wait for movieGenerator to finish
140
+ imageFile: ":preprocessor.imagePath",
141
+ movieFile: ":preprocessor.movieFile",
142
+ },
143
+ },
155
144
  output: {
156
145
  agent: "copyAgent",
157
146
  inputs: {
158
- onComplete: ":movieGenerator",
159
- imageFile: ":preprocessor.path",
160
- movieFile: ":prepareMovie.movieFile",
147
+ imageFile: ":onComplete.imageFile",
148
+ movieFile: ":onComplete.movieFile",
161
149
  },
162
150
  isResult: true,
163
151
  },
@@ -168,11 +156,26 @@ const graph_data = {
168
156
  agent: (namedInputs) => {
169
157
  const { array, context } = namedInputs;
170
158
  const { studio } = context;
159
+ const beatIndexMap = {};
171
160
  array.forEach((update, index) => {
172
161
  const beat = studio.beats[index];
173
162
  studio.beats[index] = { ...beat, ...update };
163
+ const id = studio.script.beats[index].id;
164
+ if (id) {
165
+ beatIndexMap[id] = index;
166
+ }
167
+ });
168
+ studio.beats.forEach((studioBeat, index) => {
169
+ const beat = studio.script.beats[index];
170
+ if (beat.image?.type === "beat") {
171
+ if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
172
+ studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
173
+ }
174
+ else if (index > 0) {
175
+ studioBeat.imageFile = studio.beats[index - 1].imageFile;
176
+ }
177
+ }
174
178
  });
175
- // console.log(namedInputs);
176
179
  return { studio };
177
180
  },
178
181
  inputs: {
@@ -180,7 +183,7 @@ const graph_data = {
180
183
  context: ":context",
181
184
  },
182
185
  },
183
- writeOutout: {
186
+ writeOutput: {
184
187
  // console: { before: true },
185
188
  agent: "fileWriteAgent",
186
189
  inputs: {
@@ -191,14 +194,20 @@ const graph_data = {
191
194
  },
192
195
  };
193
196
  const googleAuth = async () => {
194
- const auth = new GoogleAuth({
195
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
196
- });
197
- const client = await auth.getClient();
198
- const accessToken = await client.getAccessToken();
199
- return accessToken.token;
197
+ try {
198
+ const auth = new GoogleAuth({
199
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
200
+ });
201
+ const client = await auth.getClient();
202
+ const accessToken = await client.getAccessToken();
203
+ return accessToken.token;
204
+ }
205
+ catch (error) {
206
+ GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
207
+ throw error;
208
+ }
200
209
  };
201
- const generateImages = async (context) => {
210
+ const generateImages = async (context, callbacks) => {
202
211
  const { studio, fileDirs } = context;
203
212
  const { outDirPath, imageDirPath } = fileDirs;
204
213
  mkdir(`${imageDirPath}/${studio.filename}`);
@@ -248,7 +257,25 @@ const generateImages = async (context) => {
248
257
  throw new Error(`Failed to download image: ${image.source.url}`);
249
258
  }
250
259
  const buffer = Buffer.from(await response.arrayBuffer());
251
- const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
260
+ // Detect file extension from Content-Type header or URL
261
+ const extension = (() => {
262
+ const contentType = response.headers.get("content-type");
263
+ if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
264
+ return "jpg";
265
+ }
266
+ else if (contentType?.includes("png")) {
267
+ return "png";
268
+ }
269
+ else {
270
+ // Fall back to URL extension
271
+ const urlExtension = image.source.url.split(".").pop()?.toLowerCase();
272
+ if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
273
+ return urlExtension === "jpeg" ? "jpg" : urlExtension;
274
+ }
275
+ return "png"; // default
276
+ }
277
+ })();
278
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.${extension}`;
252
279
  await fs.promises.writeFile(imagePath, buffer);
253
280
  imageRefs[key] = imagePath;
254
281
  }
@@ -266,14 +293,19 @@ const generateImages = async (context) => {
266
293
  Object.keys(injections).forEach((key) => {
267
294
  graph.injectValue(key, injections[key]);
268
295
  });
296
+ if (callbacks) {
297
+ callbacks.forEach((callback) => {
298
+ graph.registerCallback(callback);
299
+ });
300
+ }
269
301
  await graph.run();
270
302
  };
271
- export const images = async (context) => {
303
+ export const images = async (context, callbacks) => {
272
304
  try {
273
- MulmoStudioMethods.setSessionState(context.studio, "image", true);
274
- await generateImages(context);
305
+ MulmoStudioContextMethods.setSessionState(context, "image", true);
306
+ await generateImages(context, callbacks);
275
307
  }
276
308
  finally {
277
- MulmoStudioMethods.setSessionState(context.studio, "image", false);
309
+ MulmoStudioContextMethods.setSessionState(context, "image", false);
278
310
  }
279
311
  };