mulmocast 0.1.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/assets/templates/akira_comic.json +1 -1
  2. package/assets/templates/ani.json +1 -1
  3. package/assets/templates/ani_ja.json +2 -3
  4. package/assets/templates/characters.json +1 -1
  5. package/assets/templates/children_book.json +1 -1
  6. package/assets/templates/comic_strips.json +1 -1
  7. package/assets/templates/drslump_comic.json +1 -1
  8. package/assets/templates/ghibli_comic.json +1 -1
  9. package/assets/templates/ghibli_image_only.json +1 -1
  10. package/assets/templates/ghibli_shorts.json +2 -3
  11. package/assets/templates/ghost_comic.json +1 -1
  12. package/assets/templates/onepiece_comic.json +1 -1
  13. package/assets/templates/portrait_movie.json +1 -1
  14. package/assets/templates/realistic_movie.json +1 -1
  15. package/assets/templates/sensei_and_taro.json +4 -5
  16. package/assets/templates/shorts.json +1 -1
  17. package/assets/templates/trailer.json +1 -1
  18. package/lib/actions/audio.js +6 -7
  19. package/lib/actions/image_agents.d.ts +25 -76
  20. package/lib/actions/image_agents.js +11 -3
  21. package/lib/actions/images.js +36 -4
  22. package/lib/actions/movie.js +1 -1
  23. package/lib/agents/index.d.ts +2 -1
  24. package/lib/agents/index.js +2 -1
  25. package/lib/agents/movie_replicate_agent.js +17 -5
  26. package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
  27. package/lib/agents/sound_effect_replicate_agent.js +59 -0
  28. package/lib/mcp/server.js +2 -2
  29. package/lib/methods/index.d.ts +1 -0
  30. package/lib/methods/index.js +1 -0
  31. package/lib/methods/mulmo_presentation_style.d.ts +10 -5
  32. package/lib/methods/mulmo_presentation_style.js +24 -20
  33. package/lib/methods/mulmo_script.d.ts +4 -0
  34. package/lib/methods/mulmo_script.js +31 -0
  35. package/lib/types/agent.d.ts +9 -0
  36. package/lib/types/schema.d.ts +396 -244
  37. package/lib/types/schema.js +22 -12
  38. package/lib/types/type.d.ts +2 -3
  39. package/lib/utils/assets.d.ts +18 -0
  40. package/lib/utils/assets.js +101 -0
  41. package/lib/utils/context.d.ts +25 -12
  42. package/lib/utils/context.js +2 -1
  43. package/lib/utils/file.d.ts +4 -1
  44. package/lib/utils/file.js +3 -5
  45. package/lib/utils/preprocess.d.ts +20 -11
  46. package/lib/utils/preprocess.js +7 -5
  47. package/lib/utils/provider2agent.d.ts +19 -1
  48. package/lib/utils/provider2agent.js +73 -0
  49. package/lib/utils/utils.js +3 -0
  50. package/package.json +1 -1
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "movieParams": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "movieParams": {
@@ -23,9 +23,8 @@
23
23
  "height": 1536
24
24
  },
25
25
  "speechParams": {
26
- "provider": "nijivoice",
27
26
  "speakers": {
28
- "Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
27
+ "Presenter": { "provider": "nijivoice", "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
29
28
  }
30
29
  },
31
30
  "imageParams": {
@@ -3,7 +3,7 @@
3
3
  "description": "Template for story with multiple characters.",
4
4
  "presentationStyle": {
5
5
  "$mulmocast": {
6
- "version": "1.0",
6
+ "version": "1.1",
7
7
  "credit": "closing"
8
8
  },
9
9
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -12,9 +12,8 @@
12
12
  "height": 1536
13
13
  },
14
14
  "speechParams": {
15
- "provider": "nijivoice",
16
15
  "speakers": {
17
- "Presenter": { "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
16
+ "Presenter": { "provider": "nijivoice", "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
18
17
  }
19
18
  },
20
19
  "imageParams": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0",
7
+ "version": "1.1",
8
8
  "credit": "closing"
9
9
  },
10
10
  "canvasSize": {
@@ -15,11 +15,10 @@
15
15
  "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
16
16
  },
17
17
  "speechParams": {
18
- "provider": "nijivoice",
19
18
  "speakers": {
20
- "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
21
- "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
22
- "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
19
+ "Announcer": { "provider": "nijivoice", "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
20
+ "Student": { "provider": "nijivoice", "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
21
+ "Teacher": { "provider": "nijivoice", "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
23
22
  }
24
23
  }
25
24
  },
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0"
7
+ "version": "1.1"
8
8
  },
9
9
  "canvasSize": {
10
10
  "width": 720,
@@ -4,7 +4,7 @@
4
4
  "systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
5
5
  "presentationStyle": {
6
6
  "$mulmocast": {
7
- "version": "1.0"
7
+ "version": "1.1"
8
8
  },
9
9
  "canvasSize": {
10
10
  "width": 1280,
@@ -9,6 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
9
  import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
12
+ import { text2SpeechProviderSchema, } from "../types/index.js";
12
13
  import { fileCacheAgentFilter } from "../utils/filters.js";
13
14
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
14
15
  import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
@@ -30,12 +31,10 @@ const getAudioPath = (context, beat, audioFile) => {
30
31
  return audioFile;
31
32
  };
32
33
  const getAudioParam = (presentationStyle, beat) => {
33
- const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
34
- // Use speaker-specific provider if available, otherwise fall back to script-level provider
35
- const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
36
- const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
37
- const model = MulmoPresentationStyleMethods.getTTSModel(presentationStyle, beat);
38
- return { voiceId, provider, speechOptions, model };
34
+ const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
35
+ const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
36
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
37
+ return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
39
38
  };
40
39
  export const getBeatAudioPath = (text, context, beat, lang) => {
41
40
  const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
@@ -183,7 +182,7 @@ export const audioFilePath = (context) => {
183
182
  const getConcurrency = (context) => {
184
183
  // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
185
184
  const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
186
- const provider = (speaker.provider ?? context.presentationStyle.speechParams.provider);
185
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
187
186
  return provider2TTSAgent[provider].hasLimitedConcurrency;
188
187
  });
189
188
  return hasLimitedConcurrencyProvider ? 1 : 8;
@@ -1,4 +1,4 @@
1
- import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension } from "../types/index.js";
1
+ import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension, MulmoImageParams } from "../types/index.js";
2
2
  export declare const imagePreprocessAgent: (namedInputs: {
3
3
  context: MulmoStudioContext;
4
4
  beat: MulmoBeat;
@@ -12,32 +12,15 @@ export declare const imagePreprocessAgent: (namedInputs: {
12
12
  } | {
13
13
  imagePath: string | undefined;
14
14
  referenceImageForMovie: string | undefined;
15
- imageParams: {
16
- provider: string;
17
- model?: string | undefined;
18
- style?: string | undefined;
19
- moderation?: string | undefined;
20
- images?: Record<string, {
21
- type: "image";
22
- source: {
23
- url: string;
24
- kind: "url";
25
- } | {
26
- kind: "base64";
27
- data: string;
28
- } | {
29
- text: string;
30
- kind: "text";
31
- } | {
32
- path: string;
33
- kind: "path";
34
- };
35
- } | {
36
- type: "imagePrompt";
37
- prompt: string;
38
- }> | undefined;
39
- };
15
+ imageParams: MulmoImageParams;
40
16
  movieFile: string | undefined;
17
+ soundEffectFile?: string;
18
+ soundEffectPrompt?: string;
19
+ soundEffectModel?: string;
20
+ soundEffectAgentInfo?: {
21
+ agentName: string;
22
+ defaultModel: string;
23
+ };
41
24
  htmlPrompt?: undefined;
42
25
  htmlPath?: undefined;
43
26
  htmlImageSystemPrompt?: undefined;
@@ -59,32 +42,15 @@ export declare const imagePreprocessAgent: (namedInputs: {
59
42
  } | undefined;
60
43
  };
61
44
  };
62
- imageParams: {
63
- provider: string;
64
- model?: string | undefined;
65
- style?: string | undefined;
66
- moderation?: string | undefined;
67
- images?: Record<string, {
68
- type: "image";
69
- source: {
70
- url: string;
71
- kind: "url";
72
- } | {
73
- kind: "base64";
74
- data: string;
75
- } | {
76
- text: string;
77
- kind: "text";
78
- } | {
79
- path: string;
80
- kind: "path";
81
- };
82
- } | {
83
- type: "imagePrompt";
84
- prompt: string;
85
- }> | undefined;
86
- };
45
+ imageParams: MulmoImageParams;
87
46
  movieFile: string | undefined;
47
+ soundEffectFile?: string;
48
+ soundEffectPrompt?: string;
49
+ soundEffectModel?: string;
50
+ soundEffectAgentInfo?: {
51
+ agentName: string;
52
+ defaultModel: string;
53
+ };
88
54
  htmlPrompt?: undefined;
89
55
  htmlPath?: undefined;
90
56
  htmlImageSystemPrompt?: undefined;
@@ -109,32 +75,15 @@ export declare const imagePreprocessAgent: (namedInputs: {
109
75
  } | undefined;
110
76
  };
111
77
  };
112
- imageParams: {
113
- provider: string;
114
- model?: string | undefined;
115
- style?: string | undefined;
116
- moderation?: string | undefined;
117
- images?: Record<string, {
118
- type: "image";
119
- source: {
120
- url: string;
121
- kind: "url";
122
- } | {
123
- kind: "base64";
124
- data: string;
125
- } | {
126
- text: string;
127
- kind: "text";
128
- } | {
129
- path: string;
130
- kind: "path";
131
- };
132
- } | {
133
- type: "imagePrompt";
134
- prompt: string;
135
- }> | undefined;
136
- };
78
+ imageParams: MulmoImageParams;
137
79
  movieFile: string | undefined;
80
+ soundEffectFile?: string;
81
+ soundEffectPrompt?: string;
82
+ soundEffectModel?: string;
83
+ soundEffectAgentInfo?: {
84
+ agentName: string;
85
+ defaultModel: string;
86
+ };
138
87
  htmlPrompt?: undefined;
139
88
  htmlPath?: undefined;
140
89
  htmlImageSystemPrompt?: undefined;
@@ -1,5 +1,5 @@
1
1
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods } from "../methods/index.js";
2
- import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
2
+ import { getBeatPngImagePath, getBeatMoviePaths } from "../utils/file.js";
3
3
  import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
4
4
  import { renderHTMLToImage } from "../utils/markdown.js";
5
5
  import { GraphAILogger } from "graphai";
@@ -18,10 +18,18 @@ export const imagePreprocessAgent = async (namedInputs) => {
18
18
  return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
19
19
  }
20
20
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
21
+ const moviePaths = getBeatMoviePaths(context, index);
21
22
  const returnValue = {
22
23
  imageParams: imageAgentInfo.imageParams,
23
- movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
24
+ movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
24
25
  };
26
+ if (beat.soundEffectPrompt) {
27
+ returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
28
+ returnValue.soundEffectModel =
29
+ beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
30
+ returnValue.soundEffectFile = moviePaths.soundEffectFile;
31
+ returnValue.soundEffectPrompt = beat.soundEffectPrompt;
32
+ }
25
33
  if (beat.image) {
26
34
  const plugin = MulmoBeatMethods.getPlugin(beat);
27
35
  const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
@@ -29,7 +37,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
29
37
  return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
30
38
  }
31
39
  const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
32
- GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
40
+ GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, returnValue.soundEffectAgentInfo, "\n", beat.moviePrompt, beat.soundEffectPrompt);
33
41
  if (beat.moviePrompt && !beat.imagePrompt) {
34
42
  return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
35
43
  }
@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
7
7
  import { anthropicAgent } from "@graphai/anthropic_agent";
8
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
- import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
9
+ import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent } from "../agents/index.js";
10
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
11
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
12
12
  import { fileCacheAgentFilter } from "../utils/filters.js";
@@ -23,10 +23,14 @@ const movieAgents = {
23
23
  movieGoogleAgent,
24
24
  movieReplicateAgent,
25
25
  };
26
+ const soundEffectAgents = {
27
+ soundEffectReplicateAgent,
28
+ };
26
29
  const defaultAgents = {
27
30
  ...vanillaAgents,
28
31
  ...imageAgents,
29
32
  ...movieAgents,
33
+ ...soundEffectAgents,
30
34
  mediaMockAgent,
31
35
  fileWriteAgent,
32
36
  openAIAgent,
@@ -167,6 +171,9 @@ const beat_graph_data = {
167
171
  },
168
172
  audioChecker: {
169
173
  agent: async (namedInputs) => {
174
+ if (namedInputs.soundEffectFile) {
175
+ return { hasMovieAudio: true };
176
+ }
170
177
  const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
171
178
  if (!sourceFile) {
172
179
  return { hasMovieAudio: false };
@@ -175,22 +182,47 @@ const beat_graph_data = {
175
182
  return { hasMovieAudio: hasAudio };
176
183
  },
177
184
  inputs: {
178
- onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
185
+ onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
179
186
  movieFile: ":preprocessor.movieFile",
180
187
  imageFile: ":preprocessor.imagePath",
188
+ soundEffectFile: ":preprocessor.soundEffectFile",
181
189
  },
182
190
  },
191
+ soundEffectGenerator: {
192
+ if: ":preprocessor.soundEffectPrompt",
193
+ agent: ":preprocessor.soundEffectAgentInfo.agentName",
194
+ inputs: {
195
+ onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
196
+ prompt: ":preprocessor.soundEffectPrompt",
197
+ movieFile: ":preprocessor.movieFile",
198
+ soundEffectFile: ":preprocessor.soundEffectFile",
199
+ params: {
200
+ model: ":preprocessor.soundEffectModel",
201
+ duration: ":beat.duration",
202
+ },
203
+ cache: {
204
+ force: [":context.force"],
205
+ file: ":preprocessor.soundEffectFile",
206
+ index: ":__mapIndex",
207
+ sessionType: "soundEffect",
208
+ mulmoContext: ":context",
209
+ },
210
+ },
211
+ defaultValue: {},
212
+ },
183
213
  output: {
184
214
  agent: "copyAgent",
185
215
  inputs: {
186
- onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
216
+ onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator"], // to wait for imageFromMovie to finish
187
217
  imageFile: ":preprocessor.imagePath",
188
218
  movieFile: ":preprocessor.movieFile",
219
+ soundEffectFile: ":preprocessor.soundEffectFile",
189
220
  hasMovieAudio: ":audioChecker.hasMovieAudio",
190
221
  },
191
222
  output: {
192
223
  imageFile: ".imageFile",
193
224
  movieFile: ".movieFile",
225
+ soundEffectFile: ".soundEffectFile",
194
226
  hasMovieAudio: ".hasMovieAudio",
195
227
  },
196
228
  isResult: true,
@@ -284,7 +316,7 @@ export const graphOption = async (context, settings) => {
284
316
  {
285
317
  name: "fileCacheAgentFilter",
286
318
  agent: fileCacheAgentFilter,
287
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
319
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator"],
288
320
  },
289
321
  ],
290
322
  taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
162
162
  beatTimestamps.push(timestamp);
163
163
  return timestamp; // Skip voice-over beats.
164
164
  }
165
- const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
165
+ const sourceFile = studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
166
166
  assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
167
167
  assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
168
168
  const extraPadding = (() => {
@@ -10,8 +10,9 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
10
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
11
11
  import ttsOpenaiAgent from "./tts_openai_agent.js";
12
12
  import validateSchemaAgent from "./validate_schema_agent.js";
13
+ import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
13
14
  import { browserlessAgent } from "@graphai/browserless_agent";
14
15
  import { textInputAgent } from "@graphai/input_agents";
15
16
  import { openAIAgent } from "@graphai/openai_agent";
16
17
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
17
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
18
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
@@ -10,9 +10,10 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
10
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
11
11
  import ttsOpenaiAgent from "./tts_openai_agent.js";
12
12
  import validateSchemaAgent from "./validate_schema_agent.js";
13
+ import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
13
14
  import { browserlessAgent } from "@graphai/browserless_agent";
14
15
  import { textInputAgent } from "@graphai/input_agents";
15
16
  import { openAIAgent } from "@graphai/openai_agent";
16
17
  // import * as vanilla from "@graphai/vanilla";
17
18
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
18
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
19
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
@@ -11,6 +11,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
11
11
  duration,
12
12
  image: undefined,
13
13
  start_image: undefined,
14
+ first_frame_image: undefined,
14
15
  aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
15
16
  // resolution: "720p", // only for bytedance/seedance-1-lite
16
17
  // fps: 24, // only for bytedance/seedance-1-lite
@@ -22,15 +23,19 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
22
23
  if (imagePath) {
23
24
  const buffer = readFileSync(imagePath);
24
25
  const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
25
- if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro" || model === "minimax/hailuo-02") {
26
- input.start_image = base64Image;
26
+ const start_image = provider2MovieAgent.replicate.modelParams[model]?.start_image;
27
+ if (start_image === "first_frame_image" || start_image === "image" || start_image === "start_image") {
28
+ input[start_image] = base64Image;
29
+ }
30
+ else if (start_image === undefined) {
31
+ throw new Error(`Model ${model} does not support image-to-video generation`);
27
32
  }
28
33
  else {
29
34
  input.image = base64Image;
30
35
  }
31
36
  }
32
37
  try {
33
- const output = await replicate.run(model ?? provider2MovieAgent.replicate.defaultModel, { input });
38
+ const output = await replicate.run(model, { input });
34
39
  // Download the generated video
35
40
  if (output && typeof output === "object" && "url" in output) {
36
41
  const videoUrl = output.url();
@@ -62,13 +67,20 @@ export const getAspectRatio = (canvasSize) => {
62
67
  export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
63
68
  const { prompt, imagePath } = namedInputs;
64
69
  const aspectRatio = getAspectRatio(params.canvasSize);
65
- const duration = params.duration ?? 5;
70
+ const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
71
+ if (!provider2MovieAgent.replicate.modelParams[model]) {
72
+ throw new Error(`Model ${model} is not supported`);
73
+ }
74
+ const duration = params.duration ?? provider2MovieAgent.replicate.modelParams[model].durations[0] ?? 5;
75
+ if (!provider2MovieAgent.replicate.modelParams[model].durations.includes(duration)) {
76
+ throw new Error(`Duration ${duration} is not supported for model ${model}. Supported durations: ${provider2MovieAgent.replicate.modelParams[model].durations.join(", ")}`);
77
+ }
66
78
  const apiKey = config?.apiKey;
67
79
  if (!apiKey) {
68
80
  throw new Error("REPLICATE_API_TOKEN environment variable is required");
69
81
  }
70
82
  try {
71
- const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
83
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration);
72
84
  if (buffer) {
73
85
  return { buffer };
74
86
  }
@@ -0,0 +1,5 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentParams, ReplicateSoundEffectAgentConfig } from "../types/agent.js";
3
+ export declare const soundEffectReplicateAgent: AgentFunction<ReplicateSoundEffectAgentParams, AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentConfig>;
4
+ declare const soundEffectReplicateAgentInfo: AgentFunctionInfo;
5
+ export default soundEffectReplicateAgentInfo;