mulmocast 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +1 -1
- package/assets/templates/ani.json +3 -3
- package/assets/templates/ani_ja.json +3 -3
- package/assets/templates/business.json +1 -1
- package/assets/templates/characters.json +1 -1
- package/assets/templates/children_book.json +1 -1
- package/assets/templates/coding.json +1 -1
- package/assets/templates/comic_strips.json +1 -1
- package/assets/templates/drslump_comic.json +1 -1
- package/assets/templates/ghibli_comic.json +1 -1
- package/assets/templates/ghibli_image_only.json +1 -1
- package/assets/templates/ghibli_shorts.json +1 -1
- package/assets/templates/ghost_comic.json +1 -1
- package/assets/templates/html.json +1 -1
- package/assets/templates/onepiece_comic.json +1 -1
- package/assets/templates/portrait_movie.json +1 -1
- package/assets/templates/realistic_movie.json +1 -1
- package/assets/templates/sensei_and_taro.json +1 -1
- package/assets/templates/shorts.json +1 -1
- package/assets/templates/text_and_image.json +1 -1
- package/assets/templates/text_only.json +1 -1
- package/assets/templates/trailer.json +1 -1
- package/lib/actions/image_agents.d.ts +24 -0
- package/lib/actions/image_agents.js +9 -0
- package/lib/actions/images.js +35 -6
- package/lib/actions/movie.js +3 -2
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/lipsync_replicate_agent.d.ts +5 -0
- package/lib/agents/lipsync_replicate_agent.js +72 -0
- package/lib/agents/movie_replicate_agent.js +10 -1
- package/lib/cli/commands/tool/prompt/builder.js +2 -2
- package/lib/cli/commands/tool/scripting/builder.js +2 -2
- package/lib/cli/commands/tool/story_to_script/builder.js +2 -2
- package/lib/data/index.d.ts +2 -0
- package/lib/data/index.js +2 -0
- package/lib/data/promptTemplates.d.ts +449 -0
- package/lib/data/promptTemplates.js +571 -0
- package/lib/data/scriptTemplates.d.ts +657 -0
- package/lib/data/scriptTemplates.js +1099 -0
- package/lib/index.browser.d.ts +2 -1
- package/lib/index.browser.js +2 -1
- package/lib/index.common.d.ts +1 -0
- package/lib/index.common.js +1 -0
- package/lib/methods/index.d.ts +0 -1
- package/lib/methods/index.js +0 -1
- package/lib/methods/mulmo_presentation_style.d.ts +10 -0
- package/lib/methods/mulmo_presentation_style.js +8 -1
- package/lib/methods/mulmo_script_template.d.ts +2 -2
- package/lib/tools/create_mulmo_script_from_url.js +14 -2
- package/lib/tools/create_mulmo_script_interactively.js +2 -1
- package/lib/tools/dump_prompt.js +1 -1
- package/lib/tools/story_to_script.js +5 -4
- package/lib/types/agent.d.ts +10 -0
- package/lib/types/schema.d.ts +322 -92
- package/lib/types/schema.js +11 -2
- package/lib/types/type.d.ts +4 -4
- package/lib/utils/context.d.ts +21 -6
- package/lib/utils/context.js +1 -0
- package/lib/utils/file.d.ts +5 -5
- package/lib/utils/file.js +36 -33
- package/lib/utils/inquirer.js +2 -2
- package/lib/utils/preprocess.d.ts +16 -6
- package/lib/utils/prompt.d.ts +1 -2
- package/lib/utils/prompt.js +0 -14
- package/lib/utils/provider2agent.d.ts +13 -0
- package/lib/utils/provider2agent.js +32 -0
- package/lib/utils/system_prompt.d.ts +1 -0
- package/lib/utils/system_prompt.js +1 -0
- package/lib/utils/templates.d.ts +3 -0
- package/lib/utils/templates.js +46 -0
- package/lib/utils/utils.js +3 -0
- package/package.json +8 -4
- package/scripts/templates/business.json +1 -1
- package/scripts/templates/children_book.json +1 -1
- package/scripts/templates/coding.json +1 -1
- package/scripts/templates/html.json +1 -1
- package/scripts/templates/image_prompt_only_template.json +1 -1
- package/scripts/templates/image_prompts_template.json +1 -1
- package/scripts/templates/image_refs.json +1 -1
- package/scripts/templates/movie_prompts_no_text_template.json +1 -1
- package/scripts/templates/movie_prompts_template.json +1 -1
- package/scripts/templates/presentation.json +1 -1
- package/scripts/templates/sensei_and_taro.json +1 -1
- package/scripts/templates/shorts_template.json +1 -1
- package/scripts/templates/text_only_template.json +1 -1
- package/scripts/templates/voice_over.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Presentation with Ani
|
|
3
|
-
"description": "Template for presentation with Ani
|
|
2
|
+
"title": "Presentation with Ani",
|
|
3
|
+
"description": "Template for presentation with Ani.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -44,5 +44,5 @@
|
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
},
|
|
47
|
-
"scriptName": "image_prompts_template
|
|
47
|
+
"scriptName": "image_prompts_template"
|
|
48
48
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Presentation with Ani",
|
|
3
|
-
"description": "Template for presentation with Ani.",
|
|
2
|
+
"title": "Presentation with Ani in Japanese",
|
|
3
|
+
"description": "Template for presentation with Ani in Japanese.",
|
|
4
4
|
"systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
@@ -40,5 +40,5 @@
|
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
42
|
},
|
|
43
|
-
"scriptName": "image_prompts_template
|
|
43
|
+
"scriptName": "image_prompts_template"
|
|
44
44
|
}
|
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
"title": "Business presentation",
|
|
3
3
|
"description": "Template for business presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a business presentation of the given topic. Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article (from <img> tag) to reuse them in the presentation. Mention the reference in one of beats, if it exists. Use the JSON below as a template. chartData is the data for Chart.js",
|
|
5
|
-
"scriptName": "business
|
|
5
|
+
"scriptName": "business"
|
|
6
6
|
}
|
|
@@ -12,5 +12,5 @@
|
|
|
12
12
|
}
|
|
13
13
|
},
|
|
14
14
|
"systemPrompt": "Generate a script for a the given story with multiple characters. Generate image prompts for each character, and make references to them in the beats. Use the JSON below as a template.",
|
|
15
|
-
"scriptName": "image_refs
|
|
15
|
+
"scriptName": "image_refs"
|
|
16
16
|
}
|
|
@@ -15,5 +15,5 @@
|
|
|
15
15
|
"style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
|
|
16
16
|
}
|
|
17
17
|
},
|
|
18
|
-
"scriptName": "children_book
|
|
18
|
+
"scriptName": "children_book"
|
|
19
19
|
}
|
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
"title": "Coding presentation",
|
|
3
3
|
"description": "Template for software and coding presentation.",
|
|
4
4
|
"systemPrompt": "Generate a script for a technical presentation of the given topic. Use markdown with a code block to show some code on a slide. Avoid long coding examples, which may not fit in a single slide. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
-
"scriptName": "coding
|
|
5
|
+
"scriptName": "coding"
|
|
6
6
|
}
|
|
@@ -15,5 +15,5 @@
|
|
|
15
15
|
"style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
|
|
16
16
|
}
|
|
17
17
|
},
|
|
18
|
-
"scriptName": "text_only_template
|
|
18
|
+
"scriptName": "text_only_template"
|
|
19
19
|
}
|
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
"title": "Business presentation in HTML",
|
|
3
3
|
"description": "Template for business presentation in HTML.",
|
|
4
4
|
"systemPrompt": "Generate a script for a business presentation of the given topic. Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.",
|
|
5
|
-
"scriptName": "html
|
|
5
|
+
"scriptName": "html"
|
|
6
6
|
}
|
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
"title": "Text and Image",
|
|
3
3
|
"description": "Template for Text and Image Script.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
-
"scriptName": "image_prompts_template
|
|
5
|
+
"scriptName": "image_prompts_template"
|
|
6
6
|
}
|
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
"title": "Text Only",
|
|
3
3
|
"description": "Template for Text Only Script.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
-
"scriptName": "text_only_template
|
|
5
|
+
"scriptName": "text_only_template"
|
|
6
6
|
}
|
|
@@ -21,6 +21,14 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
21
21
|
agentName: string;
|
|
22
22
|
defaultModel: string;
|
|
23
23
|
};
|
|
24
|
+
lipSyncFile?: string;
|
|
25
|
+
lipSyncModel?: string;
|
|
26
|
+
lipSyncAgentInfo?: {
|
|
27
|
+
agentName: string;
|
|
28
|
+
defaultModel: string;
|
|
29
|
+
};
|
|
30
|
+
audioFile?: string;
|
|
31
|
+
beatDuration?: number;
|
|
24
32
|
htmlPrompt?: undefined;
|
|
25
33
|
htmlPath?: undefined;
|
|
26
34
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -51,6 +59,14 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
51
59
|
agentName: string;
|
|
52
60
|
defaultModel: string;
|
|
53
61
|
};
|
|
62
|
+
lipSyncFile?: string;
|
|
63
|
+
lipSyncModel?: string;
|
|
64
|
+
lipSyncAgentInfo?: {
|
|
65
|
+
agentName: string;
|
|
66
|
+
defaultModel: string;
|
|
67
|
+
};
|
|
68
|
+
audioFile?: string;
|
|
69
|
+
beatDuration?: number;
|
|
54
70
|
htmlPrompt?: undefined;
|
|
55
71
|
htmlPath?: undefined;
|
|
56
72
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -84,6 +100,14 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
84
100
|
agentName: string;
|
|
85
101
|
defaultModel: string;
|
|
86
102
|
};
|
|
103
|
+
lipSyncFile?: string;
|
|
104
|
+
lipSyncModel?: string;
|
|
105
|
+
lipSyncAgentInfo?: {
|
|
106
|
+
agentName: string;
|
|
107
|
+
defaultModel: string;
|
|
108
|
+
};
|
|
109
|
+
audioFile?: string;
|
|
110
|
+
beatDuration?: number;
|
|
87
111
|
htmlPrompt?: undefined;
|
|
88
112
|
htmlPath?: undefined;
|
|
89
113
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -11,6 +11,7 @@ const htmlStyle = (context, beat) => {
|
|
|
11
11
|
};
|
|
12
12
|
export const imagePreprocessAgent = async (namedInputs) => {
|
|
13
13
|
const { context, beat, index, imageRefs } = namedInputs;
|
|
14
|
+
const studioBeat = context.studio.beats[index];
|
|
14
15
|
const imagePath = getBeatPngImagePath(context, index);
|
|
15
16
|
if (beat.htmlPrompt) {
|
|
16
17
|
const htmlPrompt = MulmoBeatMethods.getHtmlPrompt(beat);
|
|
@@ -22,6 +23,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
22
23
|
const returnValue = {
|
|
23
24
|
imageParams: imageAgentInfo.imageParams,
|
|
24
25
|
movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
|
|
26
|
+
beatDuration: beat.duration ?? studioBeat?.duration,
|
|
25
27
|
};
|
|
26
28
|
if (beat.soundEffectPrompt) {
|
|
27
29
|
returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
|
|
@@ -30,6 +32,13 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
30
32
|
returnValue.soundEffectFile = moviePaths.soundEffectFile;
|
|
31
33
|
returnValue.soundEffectPrompt = beat.soundEffectPrompt;
|
|
32
34
|
}
|
|
35
|
+
if (beat.enableLipSync) {
|
|
36
|
+
returnValue.lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
|
|
37
|
+
returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? returnValue.lipSyncAgentInfo.defaultModel;
|
|
38
|
+
returnValue.lipSyncFile = moviePaths.lipSyncFile;
|
|
39
|
+
// Audio file will be set from the beat's audio file when available
|
|
40
|
+
returnValue.audioFile = studioBeat?.audioFile;
|
|
41
|
+
}
|
|
33
42
|
if (beat.image) {
|
|
34
43
|
const plugin = MulmoBeatMethods.getPlugin(beat);
|
|
35
44
|
const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
package/lib/actions/images.js
CHANGED
|
@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
|
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
|
-
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent } from "../agents/index.js";
|
|
9
|
+
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, } from "../agents/index.js";
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
@@ -26,11 +26,15 @@ const movieAgents = {
|
|
|
26
26
|
const soundEffectAgents = {
|
|
27
27
|
soundEffectReplicateAgent,
|
|
28
28
|
};
|
|
29
|
+
const lipSyncAgents = {
|
|
30
|
+
lipSyncReplicateAgent,
|
|
31
|
+
};
|
|
29
32
|
const defaultAgents = {
|
|
30
33
|
...vanillaAgents,
|
|
31
34
|
...imageAgents,
|
|
32
35
|
...movieAgents,
|
|
33
36
|
...soundEffectAgents,
|
|
37
|
+
...lipSyncAgents,
|
|
34
38
|
mediaMockAgent,
|
|
35
39
|
fileWriteAgent,
|
|
36
40
|
openAIAgent,
|
|
@@ -151,7 +155,7 @@ const beat_graph_data = {
|
|
|
151
155
|
},
|
|
152
156
|
params: {
|
|
153
157
|
model: ":preprocessor.movieAgentInfo.movieParams.model",
|
|
154
|
-
duration: ":
|
|
158
|
+
duration: ":preprocessor.beatDuration",
|
|
155
159
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
156
160
|
},
|
|
157
161
|
},
|
|
@@ -171,6 +175,7 @@ const beat_graph_data = {
|
|
|
171
175
|
},
|
|
172
176
|
audioChecker: {
|
|
173
177
|
agent: async (namedInputs) => {
|
|
178
|
+
// NOTE: We intentinonally don't check lipSyncFile here.
|
|
174
179
|
if (namedInputs.soundEffectFile) {
|
|
175
180
|
return { hasMovieAudio: true };
|
|
176
181
|
}
|
|
@@ -182,7 +187,7 @@ const beat_graph_data = {
|
|
|
182
187
|
return { hasMovieAudio: hasAudio };
|
|
183
188
|
},
|
|
184
189
|
inputs: {
|
|
185
|
-
onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"],
|
|
190
|
+
onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"],
|
|
186
191
|
movieFile: ":preprocessor.movieFile",
|
|
187
192
|
imageFile: ":preprocessor.imagePath",
|
|
188
193
|
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
@@ -198,7 +203,7 @@ const beat_graph_data = {
|
|
|
198
203
|
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
199
204
|
params: {
|
|
200
205
|
model: ":preprocessor.soundEffectModel",
|
|
201
|
-
duration: ":
|
|
206
|
+
duration: ":preprocessor.beatDuration",
|
|
202
207
|
},
|
|
203
208
|
cache: {
|
|
204
209
|
force: [":context.force"],
|
|
@@ -210,19 +215,43 @@ const beat_graph_data = {
|
|
|
210
215
|
},
|
|
211
216
|
defaultValue: {},
|
|
212
217
|
},
|
|
218
|
+
lipSyncGenerator: {
|
|
219
|
+
if: ":beat.enableLipSync",
|
|
220
|
+
agent: ":preprocessor.lipSyncAgentInfo.agentName",
|
|
221
|
+
inputs: {
|
|
222
|
+
onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
|
|
223
|
+
movieFile: ":preprocessor.movieFile",
|
|
224
|
+
audioFile: ":preprocessor.audioFile",
|
|
225
|
+
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
226
|
+
params: {
|
|
227
|
+
model: ":preprocessor.lipSyncModel",
|
|
228
|
+
duration: ":preprocessor.beatDuration",
|
|
229
|
+
},
|
|
230
|
+
cache: {
|
|
231
|
+
force: [":context.force"],
|
|
232
|
+
file: ":preprocessor.lipSyncFile",
|
|
233
|
+
index: ":__mapIndex",
|
|
234
|
+
sessionType: "lipSync",
|
|
235
|
+
mulmoContext: ":context",
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
defaultValue: {},
|
|
239
|
+
},
|
|
213
240
|
output: {
|
|
214
241
|
agent: "copyAgent",
|
|
215
242
|
inputs: {
|
|
216
|
-
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator"], // to wait for imageFromMovie to finish
|
|
243
|
+
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator", ":lipSyncGenerator"], // to wait for imageFromMovie, soundEffectGenerator, and lipSyncGenerator to finish
|
|
217
244
|
imageFile: ":preprocessor.imagePath",
|
|
218
245
|
movieFile: ":preprocessor.movieFile",
|
|
219
246
|
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
247
|
+
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
220
248
|
hasMovieAudio: ":audioChecker.hasMovieAudio",
|
|
221
249
|
},
|
|
222
250
|
output: {
|
|
223
251
|
imageFile: ".imageFile",
|
|
224
252
|
movieFile: ".movieFile",
|
|
225
253
|
soundEffectFile: ".soundEffectFile",
|
|
254
|
+
lipSyncFile: ".lipSyncFile",
|
|
226
255
|
hasMovieAudio: ".hasMovieAudio",
|
|
227
256
|
},
|
|
228
257
|
isResult: true,
|
|
@@ -316,7 +345,7 @@ export const graphOption = async (context, settings) => {
|
|
|
316
345
|
{
|
|
317
346
|
name: "fileCacheAgentFilter",
|
|
318
347
|
agent: fileCacheAgentFilter,
|
|
319
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator"],
|
|
348
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator"],
|
|
320
349
|
},
|
|
321
350
|
],
|
|
322
351
|
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
package/lib/actions/movie.js
CHANGED
|
@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
162
162
|
beatTimestamps.push(timestamp);
|
|
163
163
|
return timestamp; // Skip voice-over beats.
|
|
164
164
|
}
|
|
165
|
-
const sourceFile = studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
|
|
165
|
+
const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
|
|
166
166
|
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
167
167
|
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
168
168
|
const extraPadding = (() => {
|
|
@@ -183,7 +183,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
183
183
|
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
184
184
|
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
185
185
|
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
186
|
-
const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
186
|
+
const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
187
187
|
const speed = beat.movieParams?.speed ?? 1.0;
|
|
188
188
|
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
|
|
189
189
|
ffmpegContext.filterComplex.push(videoPart);
|
|
@@ -206,6 +206,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
206
206
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
207
207
|
const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
|
|
208
208
|
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
209
|
+
// TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
|
|
209
210
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
210
211
|
audioIdsFromMovieBeats.push(audioId);
|
|
211
212
|
ffmpegContext.filterComplex.push(audioPart);
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -11,8 +11,9 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
13
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
|
+
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
14
15
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
15
16
|
import { textInputAgent } from "@graphai/input_agents";
|
|
16
17
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
17
18
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
18
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
|
|
19
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -11,9 +11,10 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
13
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
|
+
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
14
15
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
15
16
|
import { textInputAgent } from "@graphai/input_agents";
|
|
16
17
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
17
18
|
// import * as vanilla from "@graphai/vanilla";
|
|
18
19
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
19
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
|
|
20
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentParams, ReplicateLipSyncAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const lipSyncReplicateAgent: AgentFunction<ReplicateLipSyncAgentParams, AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentConfig>;
|
|
4
|
+
declare const lipSyncReplicateAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default lipSyncReplicateAgentInfo;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger } from "graphai";
|
|
3
|
+
import Replicate from "replicate";
|
|
4
|
+
import { provider2LipSyncAgent } from "../utils/provider2agent.js";
|
|
5
|
+
export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
6
|
+
const { movieFile, audioFile } = namedInputs;
|
|
7
|
+
const apiKey = config?.apiKey;
|
|
8
|
+
const model = params.model ?? provider2LipSyncAgent.replicate.defaultModel;
|
|
9
|
+
if (!apiKey) {
|
|
10
|
+
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
11
|
+
}
|
|
12
|
+
const replicate = new Replicate({
|
|
13
|
+
auth: apiKey,
|
|
14
|
+
});
|
|
15
|
+
const videoBuffer = readFileSync(movieFile);
|
|
16
|
+
const audioBuffer = readFileSync(audioFile);
|
|
17
|
+
const videoUri = `data:video/quicktime;base64,${videoBuffer.toString("base64")}`;
|
|
18
|
+
const audioUri = `data:audio/wav;base64,${audioBuffer.toString("base64")}`;
|
|
19
|
+
const input = {
|
|
20
|
+
video: undefined,
|
|
21
|
+
video_input: undefined,
|
|
22
|
+
video_url: undefined,
|
|
23
|
+
audio: undefined,
|
|
24
|
+
audio_input: undefined,
|
|
25
|
+
audio_file: undefined,
|
|
26
|
+
};
|
|
27
|
+
const modelParams = provider2LipSyncAgent.replicate.modelParams[model];
|
|
28
|
+
if (!modelParams) {
|
|
29
|
+
throw new Error(`Model ${model} is not supported`);
|
|
30
|
+
}
|
|
31
|
+
const videoParam = modelParams.video;
|
|
32
|
+
const audioParam = modelParams.audio;
|
|
33
|
+
if (videoParam === "video" || videoParam === "video_input" || videoParam === "video_url") {
|
|
34
|
+
input[videoParam] = videoUri;
|
|
35
|
+
}
|
|
36
|
+
if (audioParam === "audio" || audioParam === "audio_input" || audioParam === "audio_file") {
|
|
37
|
+
input[audioParam] = audioUri;
|
|
38
|
+
}
|
|
39
|
+
const model_identifier = provider2LipSyncAgent.replicate.modelParams[model]?.identifier ?? model;
|
|
40
|
+
try {
|
|
41
|
+
const output = await replicate.run(model_identifier, {
|
|
42
|
+
input,
|
|
43
|
+
});
|
|
44
|
+
if (output && typeof output === "object" && "url" in output) {
|
|
45
|
+
const videoUrl = output.url();
|
|
46
|
+
const videoResponse = await fetch(videoUrl);
|
|
47
|
+
if (!videoResponse.ok) {
|
|
48
|
+
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
|
|
49
|
+
}
|
|
50
|
+
const arrayBuffer = await videoResponse.arrayBuffer();
|
|
51
|
+
return { buffer: Buffer.from(arrayBuffer) };
|
|
52
|
+
}
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
GraphAILogger.info("Failed to generate lip sync:", error.message);
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
const lipSyncReplicateAgentInfo = {
|
|
61
|
+
name: "lipSyncReplicateAgent",
|
|
62
|
+
agent: lipSyncReplicateAgent,
|
|
63
|
+
mock: lipSyncReplicateAgent,
|
|
64
|
+
samples: [],
|
|
65
|
+
description: "Replicate Lip Sync agent (video + audio to video)",
|
|
66
|
+
category: ["movie"],
|
|
67
|
+
author: "Receptron Team",
|
|
68
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
69
|
+
license: "MIT",
|
|
70
|
+
environmentVariables: ["REPLICATE_API_TOKEN"],
|
|
71
|
+
};
|
|
72
|
+
export default lipSyncReplicateAgentInfo;
|
|
@@ -71,7 +71,16 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
|
71
71
|
if (!provider2MovieAgent.replicate.modelParams[model]) {
|
|
72
72
|
throw new Error(`Model ${model} is not supported`);
|
|
73
73
|
}
|
|
74
|
-
const duration =
|
|
74
|
+
const duration = (() => {
|
|
75
|
+
const durations = provider2MovieAgent.replicate.modelParams[model].durations;
|
|
76
|
+
if (params.duration) {
|
|
77
|
+
const largerDurations = durations.filter((d) => d >= params.duration);
|
|
78
|
+
return largerDurations.length > 0 ? largerDurations[0] : durations[durations.length - 1];
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
return durations[0];
|
|
82
|
+
}
|
|
83
|
+
})();
|
|
75
84
|
if (!provider2MovieAgent.replicate.modelParams[model].durations.includes(duration)) {
|
|
76
85
|
throw new Error(`Duration ${duration} is not supported for model ${model}. Supported durations: ${provider2MovieAgent.replicate.modelParams[model].durations.join(", ")}`);
|
|
77
86
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
const availableTemplateNames =
|
|
1
|
+
import { getAvailablePromptTemplates } from "../../../../utils/file.js";
|
|
2
|
+
const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
|
|
3
3
|
export const builder = (yargs) => {
|
|
4
4
|
return yargs.option("t", {
|
|
5
5
|
alias: "template",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { llm } from "../../../../utils/provider2agent.js";
|
|
2
|
-
import {
|
|
3
|
-
const availableTemplateNames =
|
|
2
|
+
import { getAvailablePromptTemplates } from "../../../../utils/file.js";
|
|
3
|
+
const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
|
|
4
4
|
export const builder = (yargs) => {
|
|
5
5
|
return yargs
|
|
6
6
|
.option("o", {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getAvailablePromptTemplates } from "../../../../utils/file.js";
|
|
2
2
|
import { llm } from "../../../../utils/provider2agent.js";
|
|
3
3
|
import { storyToScriptGenerateMode } from "../../../../utils/const.js";
|
|
4
|
-
const availableTemplateNames =
|
|
4
|
+
const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
|
|
5
5
|
export const builder = (yargs) => {
|
|
6
6
|
return yargs
|
|
7
7
|
.option("o", {
|