mulmocast 0.1.7 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +1 -1
- package/assets/templates/ani.json +3 -3
- package/assets/templates/ani_ja.json +4 -5
- package/assets/templates/characters.json +1 -1
- package/assets/templates/children_book.json +1 -1
- package/assets/templates/comic_strips.json +1 -1
- package/assets/templates/drslump_comic.json +1 -1
- package/assets/templates/ghibli_comic.json +1 -1
- package/assets/templates/ghibli_image_only.json +1 -1
- package/assets/templates/ghibli_shorts.json +2 -3
- package/assets/templates/ghost_comic.json +1 -1
- package/assets/templates/onepiece_comic.json +1 -1
- package/assets/templates/portrait_movie.json +1 -1
- package/assets/templates/realistic_movie.json +1 -1
- package/assets/templates/sensei_and_taro.json +4 -5
- package/assets/templates/shorts.json +1 -1
- package/assets/templates/trailer.json +1 -1
- package/lib/actions/audio.js +6 -7
- package/lib/actions/image_agents.d.ts +46 -76
- package/lib/actions/image_agents.js +18 -3
- package/lib/actions/images.js +65 -4
- package/lib/actions/movie.js +3 -2
- package/lib/agents/index.d.ts +3 -1
- package/lib/agents/index.js +3 -1
- package/lib/agents/lipsync_replicate_agent.d.ts +5 -0
- package/lib/agents/lipsync_replicate_agent.js +57 -0
- package/lib/agents/movie_replicate_agent.js +17 -5
- package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
- package/lib/agents/sound_effect_replicate_agent.js +59 -0
- package/lib/data/index.d.ts +2 -0
- package/lib/data/index.js +2 -0
- package/lib/data/promptTemplates.d.ts +695 -0
- package/lib/data/promptTemplates.js +957 -0
- package/lib/data/scriptTemplates.d.ts +233 -0
- package/lib/data/scriptTemplates.js +580 -0
- package/lib/index.browser.d.ts +2 -1
- package/lib/index.browser.js +2 -1
- package/lib/mcp/server.js +2 -2
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_presentation_style.d.ts +18 -5
- package/lib/methods/mulmo_presentation_style.js +31 -20
- package/lib/methods/mulmo_script.d.ts +4 -0
- package/lib/methods/mulmo_script.js +31 -0
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/agent.d.ts +19 -0
- package/lib/types/schema.d.ts +628 -246
- package/lib/types/schema.js +31 -12
- package/lib/types/type.d.ts +2 -3
- package/lib/utils/assets.d.ts +18 -0
- package/lib/utils/assets.js +101 -0
- package/lib/utils/context.d.ts +40 -12
- package/lib/utils/context.js +3 -1
- package/lib/utils/file.d.ts +12 -4
- package/lib/utils/file.js +48 -24
- package/lib/utils/preprocess.d.ts +30 -11
- package/lib/utils/preprocess.js +7 -5
- package/lib/utils/provider2agent.d.ts +30 -1
- package/lib/utils/provider2agent.js +86 -0
- package/lib/utils/utils.js +6 -0
- package/package.json +8 -4
- package/scripts/templates/business.json +1 -1
- package/scripts/templates/children_book.json +1 -1
- package/scripts/templates/coding.json +1 -1
- package/scripts/templates/html.json +1 -1
- package/scripts/templates/image_prompt_only_template.json +1 -1
- package/scripts/templates/image_prompts_template.json +1 -1
- package/scripts/templates/image_refs.json +1 -1
- package/scripts/templates/movie_prompts_no_text_template.json +1 -1
- package/scripts/templates/movie_prompts_template.json +1 -1
- package/scripts/templates/presentation.json +1 -1
- package/scripts/templates/sensei_and_taro.json +1 -1
- package/scripts/templates/shorts_template.json +1 -1
- package/scripts/templates/text_only_template.json +1 -1
- package/scripts/templates/voice_over.json +1 -1
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Presentation with Ani
|
|
3
|
-
"description": "Template for presentation with Ani
|
|
2
|
+
"title": "Presentation with Ani",
|
|
3
|
+
"description": "Template for presentation with Ani.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"movieParams": {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
|
-
"title": "Presentation with Ani",
|
|
3
|
-
"description": "Template for presentation with Ani.",
|
|
2
|
+
"title": "Presentation with Ani in Japanese",
|
|
3
|
+
"description": "Template for presentation with Ani in Japanese.",
|
|
4
4
|
"systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"movieParams": {
|
|
@@ -23,9 +23,8 @@
|
|
|
23
23
|
"height": 1536
|
|
24
24
|
},
|
|
25
25
|
"speechParams": {
|
|
26
|
-
"provider": "nijivoice",
|
|
27
26
|
"speakers": {
|
|
28
|
-
"Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
|
|
27
|
+
"Presenter": { "provider": "nijivoice", "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
|
|
29
28
|
}
|
|
30
29
|
},
|
|
31
30
|
"imageParams": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a Japanese script for a Youtube shorts of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -12,9 +12,8 @@
|
|
|
12
12
|
"height": 1536
|
|
13
13
|
},
|
|
14
14
|
"speechParams": {
|
|
15
|
-
"provider": "nijivoice",
|
|
16
15
|
"speakers": {
|
|
17
|
-
"Presenter": { "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
|
|
16
|
+
"Presenter": { "provider": "nijivoice", "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c", "speechOptions": { "speed": 1.5 } }
|
|
18
17
|
}
|
|
19
18
|
},
|
|
20
19
|
"imageParams": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"canvasSize": {
|
|
@@ -15,11 +15,10 @@
|
|
|
15
15
|
"style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
|
|
16
16
|
},
|
|
17
17
|
"speechParams": {
|
|
18
|
-
"provider": "nijivoice",
|
|
19
18
|
"speakers": {
|
|
20
|
-
"Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
|
|
21
|
-
"Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
|
|
22
|
-
"Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
|
|
19
|
+
"Announcer": { "provider": "nijivoice", "displayName": { "ja": "アナウンサー" }, "voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c" },
|
|
20
|
+
"Student": { "provider": "nijivoice", "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
|
|
21
|
+
"Teacher": { "provider": "nijivoice", "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
|
|
23
22
|
}
|
|
24
23
|
}
|
|
25
24
|
},
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a Youtube shorts of the given topic. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1"
|
|
8
8
|
},
|
|
9
9
|
"canvasSize": {
|
|
10
10
|
"width": 720,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"systemPrompt": "Generate a script for a movie trailer of the given story. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
|
-
"version": "1.
|
|
7
|
+
"version": "1.1"
|
|
8
8
|
},
|
|
9
9
|
"canvasSize": {
|
|
10
10
|
"width": 1280,
|
package/lib/actions/audio.js
CHANGED
|
@@ -9,6 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
|
9
9
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
|
+
import { text2SpeechProviderSchema, } from "../types/index.js";
|
|
12
13
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
14
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
14
15
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
@@ -30,12 +31,10 @@ const getAudioPath = (context, beat, audioFile) => {
|
|
|
30
31
|
return audioFile;
|
|
31
32
|
};
|
|
32
33
|
const getAudioParam = (presentationStyle, beat) => {
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
const provider =
|
|
36
|
-
|
|
37
|
-
const model = MulmoPresentationStyleMethods.getTTSModel(presentationStyle, beat);
|
|
38
|
-
return { voiceId, provider, speechOptions, model };
|
|
34
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
35
|
+
const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
|
|
36
|
+
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
37
|
+
return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
|
|
39
38
|
};
|
|
40
39
|
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
41
40
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
@@ -183,7 +182,7 @@ export const audioFilePath = (context) => {
|
|
|
183
182
|
const getConcurrency = (context) => {
|
|
184
183
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
185
184
|
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
186
|
-
const provider = (speaker.provider
|
|
185
|
+
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
187
186
|
return provider2TTSAgent[provider].hasLimitedConcurrency;
|
|
188
187
|
});
|
|
189
188
|
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension } from "../types/index.js";
|
|
1
|
+
import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension, MulmoImageParams } from "../types/index.js";
|
|
2
2
|
export declare const imagePreprocessAgent: (namedInputs: {
|
|
3
3
|
context: MulmoStudioContext;
|
|
4
4
|
beat: MulmoBeat;
|
|
@@ -12,32 +12,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
12
12
|
} | {
|
|
13
13
|
imagePath: string | undefined;
|
|
14
14
|
referenceImageForMovie: string | undefined;
|
|
15
|
-
imageParams:
|
|
16
|
-
provider: string;
|
|
17
|
-
model?: string | undefined;
|
|
18
|
-
style?: string | undefined;
|
|
19
|
-
moderation?: string | undefined;
|
|
20
|
-
images?: Record<string, {
|
|
21
|
-
type: "image";
|
|
22
|
-
source: {
|
|
23
|
-
url: string;
|
|
24
|
-
kind: "url";
|
|
25
|
-
} | {
|
|
26
|
-
kind: "base64";
|
|
27
|
-
data: string;
|
|
28
|
-
} | {
|
|
29
|
-
text: string;
|
|
30
|
-
kind: "text";
|
|
31
|
-
} | {
|
|
32
|
-
path: string;
|
|
33
|
-
kind: "path";
|
|
34
|
-
};
|
|
35
|
-
} | {
|
|
36
|
-
type: "imagePrompt";
|
|
37
|
-
prompt: string;
|
|
38
|
-
}> | undefined;
|
|
39
|
-
};
|
|
15
|
+
imageParams: MulmoImageParams;
|
|
40
16
|
movieFile: string | undefined;
|
|
17
|
+
soundEffectFile?: string;
|
|
18
|
+
soundEffectPrompt?: string;
|
|
19
|
+
soundEffectModel?: string;
|
|
20
|
+
soundEffectAgentInfo?: {
|
|
21
|
+
agentName: string;
|
|
22
|
+
defaultModel: string;
|
|
23
|
+
};
|
|
24
|
+
lipSyncFile?: string;
|
|
25
|
+
lipSyncModel?: string;
|
|
26
|
+
lipSyncAgentInfo?: {
|
|
27
|
+
agentName: string;
|
|
28
|
+
defaultModel: string;
|
|
29
|
+
};
|
|
30
|
+
audioFile?: string;
|
|
41
31
|
htmlPrompt?: undefined;
|
|
42
32
|
htmlPath?: undefined;
|
|
43
33
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -59,32 +49,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
59
49
|
} | undefined;
|
|
60
50
|
};
|
|
61
51
|
};
|
|
62
|
-
imageParams:
|
|
63
|
-
provider: string;
|
|
64
|
-
model?: string | undefined;
|
|
65
|
-
style?: string | undefined;
|
|
66
|
-
moderation?: string | undefined;
|
|
67
|
-
images?: Record<string, {
|
|
68
|
-
type: "image";
|
|
69
|
-
source: {
|
|
70
|
-
url: string;
|
|
71
|
-
kind: "url";
|
|
72
|
-
} | {
|
|
73
|
-
kind: "base64";
|
|
74
|
-
data: string;
|
|
75
|
-
} | {
|
|
76
|
-
text: string;
|
|
77
|
-
kind: "text";
|
|
78
|
-
} | {
|
|
79
|
-
path: string;
|
|
80
|
-
kind: "path";
|
|
81
|
-
};
|
|
82
|
-
} | {
|
|
83
|
-
type: "imagePrompt";
|
|
84
|
-
prompt: string;
|
|
85
|
-
}> | undefined;
|
|
86
|
-
};
|
|
52
|
+
imageParams: MulmoImageParams;
|
|
87
53
|
movieFile: string | undefined;
|
|
54
|
+
soundEffectFile?: string;
|
|
55
|
+
soundEffectPrompt?: string;
|
|
56
|
+
soundEffectModel?: string;
|
|
57
|
+
soundEffectAgentInfo?: {
|
|
58
|
+
agentName: string;
|
|
59
|
+
defaultModel: string;
|
|
60
|
+
};
|
|
61
|
+
lipSyncFile?: string;
|
|
62
|
+
lipSyncModel?: string;
|
|
63
|
+
lipSyncAgentInfo?: {
|
|
64
|
+
agentName: string;
|
|
65
|
+
defaultModel: string;
|
|
66
|
+
};
|
|
67
|
+
audioFile?: string;
|
|
88
68
|
htmlPrompt?: undefined;
|
|
89
69
|
htmlPath?: undefined;
|
|
90
70
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -109,32 +89,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
109
89
|
} | undefined;
|
|
110
90
|
};
|
|
111
91
|
};
|
|
112
|
-
imageParams:
|
|
113
|
-
provider: string;
|
|
114
|
-
model?: string | undefined;
|
|
115
|
-
style?: string | undefined;
|
|
116
|
-
moderation?: string | undefined;
|
|
117
|
-
images?: Record<string, {
|
|
118
|
-
type: "image";
|
|
119
|
-
source: {
|
|
120
|
-
url: string;
|
|
121
|
-
kind: "url";
|
|
122
|
-
} | {
|
|
123
|
-
kind: "base64";
|
|
124
|
-
data: string;
|
|
125
|
-
} | {
|
|
126
|
-
text: string;
|
|
127
|
-
kind: "text";
|
|
128
|
-
} | {
|
|
129
|
-
path: string;
|
|
130
|
-
kind: "path";
|
|
131
|
-
};
|
|
132
|
-
} | {
|
|
133
|
-
type: "imagePrompt";
|
|
134
|
-
prompt: string;
|
|
135
|
-
}> | undefined;
|
|
136
|
-
};
|
|
92
|
+
imageParams: MulmoImageParams;
|
|
137
93
|
movieFile: string | undefined;
|
|
94
|
+
soundEffectFile?: string;
|
|
95
|
+
soundEffectPrompt?: string;
|
|
96
|
+
soundEffectModel?: string;
|
|
97
|
+
soundEffectAgentInfo?: {
|
|
98
|
+
agentName: string;
|
|
99
|
+
defaultModel: string;
|
|
100
|
+
};
|
|
101
|
+
lipSyncFile?: string;
|
|
102
|
+
lipSyncModel?: string;
|
|
103
|
+
lipSyncAgentInfo?: {
|
|
104
|
+
agentName: string;
|
|
105
|
+
defaultModel: string;
|
|
106
|
+
};
|
|
107
|
+
audioFile?: string;
|
|
138
108
|
htmlPrompt?: undefined;
|
|
139
109
|
htmlPath?: undefined;
|
|
140
110
|
htmlImageSystemPrompt?: undefined;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods } from "../methods/index.js";
|
|
2
|
-
import { getBeatPngImagePath,
|
|
2
|
+
import { getBeatPngImagePath, getBeatMoviePaths } from "../utils/file.js";
|
|
3
3
|
import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
|
|
4
4
|
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
5
5
|
import { GraphAILogger } from "graphai";
|
|
@@ -18,10 +18,25 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
18
18
|
return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
19
19
|
}
|
|
20
20
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
|
|
21
|
+
const moviePaths = getBeatMoviePaths(context, index);
|
|
21
22
|
const returnValue = {
|
|
22
23
|
imageParams: imageAgentInfo.imageParams,
|
|
23
|
-
movieFile: beat.moviePrompt ?
|
|
24
|
+
movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
|
|
24
25
|
};
|
|
26
|
+
if (beat.soundEffectPrompt) {
|
|
27
|
+
returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
|
|
28
|
+
returnValue.soundEffectModel =
|
|
29
|
+
beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
|
|
30
|
+
returnValue.soundEffectFile = moviePaths.soundEffectFile;
|
|
31
|
+
returnValue.soundEffectPrompt = beat.soundEffectPrompt;
|
|
32
|
+
}
|
|
33
|
+
if (beat.enableLipSync) {
|
|
34
|
+
returnValue.lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
|
|
35
|
+
returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? returnValue.lipSyncAgentInfo.defaultModel;
|
|
36
|
+
returnValue.lipSyncFile = moviePaths.lipSyncFile;
|
|
37
|
+
// Audio file will be set from the beat's audio file when available
|
|
38
|
+
returnValue.audioFile = context.studio.beats[index]?.audioFile;
|
|
39
|
+
}
|
|
25
40
|
if (beat.image) {
|
|
26
41
|
const plugin = MulmoBeatMethods.getPlugin(beat);
|
|
27
42
|
const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
|
|
@@ -29,7 +44,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
29
44
|
return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
|
|
30
45
|
}
|
|
31
46
|
const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
|
|
32
|
-
GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
|
|
47
|
+
GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, returnValue.soundEffectAgentInfo, "\n", beat.moviePrompt, beat.soundEffectPrompt);
|
|
33
48
|
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
34
49
|
return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
|
|
35
50
|
}
|
package/lib/actions/images.js
CHANGED
|
@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
|
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
|
-
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
9
|
+
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, } from "../agents/index.js";
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
@@ -23,10 +23,18 @@ const movieAgents = {
|
|
|
23
23
|
movieGoogleAgent,
|
|
24
24
|
movieReplicateAgent,
|
|
25
25
|
};
|
|
26
|
+
const soundEffectAgents = {
|
|
27
|
+
soundEffectReplicateAgent,
|
|
28
|
+
};
|
|
29
|
+
const lipSyncAgents = {
|
|
30
|
+
lipSyncReplicateAgent,
|
|
31
|
+
};
|
|
26
32
|
const defaultAgents = {
|
|
27
33
|
...vanillaAgents,
|
|
28
34
|
...imageAgents,
|
|
29
35
|
...movieAgents,
|
|
36
|
+
...soundEffectAgents,
|
|
37
|
+
...lipSyncAgents,
|
|
30
38
|
mediaMockAgent,
|
|
31
39
|
fileWriteAgent,
|
|
32
40
|
openAIAgent,
|
|
@@ -167,6 +175,10 @@ const beat_graph_data = {
|
|
|
167
175
|
},
|
|
168
176
|
audioChecker: {
|
|
169
177
|
agent: async (namedInputs) => {
|
|
178
|
+
if (namedInputs.soundEffectFile) {
|
|
179
|
+
// NOTE: We intentinonally don't check lipSyncFile here.
|
|
180
|
+
return { hasMovieAudio: true };
|
|
181
|
+
}
|
|
170
182
|
const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
|
|
171
183
|
if (!sourceFile) {
|
|
172
184
|
return { hasMovieAudio: false };
|
|
@@ -175,22 +187,71 @@ const beat_graph_data = {
|
|
|
175
187
|
return { hasMovieAudio: hasAudio };
|
|
176
188
|
},
|
|
177
189
|
inputs: {
|
|
178
|
-
onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and
|
|
190
|
+
onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"], // to wait for movieGenerator, htmlImageGenerator, soundEffectGenerator, and lipSyncGenerator to finish
|
|
179
191
|
movieFile: ":preprocessor.movieFile",
|
|
180
192
|
imageFile: ":preprocessor.imagePath",
|
|
193
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
soundEffectGenerator: {
|
|
197
|
+
if: ":preprocessor.soundEffectPrompt",
|
|
198
|
+
agent: ":preprocessor.soundEffectAgentInfo.agentName",
|
|
199
|
+
inputs: {
|
|
200
|
+
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
201
|
+
prompt: ":preprocessor.soundEffectPrompt",
|
|
202
|
+
movieFile: ":preprocessor.movieFile",
|
|
203
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
204
|
+
params: {
|
|
205
|
+
model: ":preprocessor.soundEffectModel",
|
|
206
|
+
duration: ":beat.duration",
|
|
207
|
+
},
|
|
208
|
+
cache: {
|
|
209
|
+
force: [":context.force"],
|
|
210
|
+
file: ":preprocessor.soundEffectFile",
|
|
211
|
+
index: ":__mapIndex",
|
|
212
|
+
sessionType: "soundEffect",
|
|
213
|
+
mulmoContext: ":context",
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
defaultValue: {},
|
|
217
|
+
},
|
|
218
|
+
lipSyncGenerator: {
|
|
219
|
+
if: ":beat.enableLipSync",
|
|
220
|
+
agent: ":preprocessor.lipSyncAgentInfo.agentName",
|
|
221
|
+
inputs: {
|
|
222
|
+
onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
|
|
223
|
+
movieFile: ":preprocessor.movieFile",
|
|
224
|
+
audioFile: ":preprocessor.audioFile",
|
|
225
|
+
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
226
|
+
params: {
|
|
227
|
+
model: ":preprocessor.lipSyncModel",
|
|
228
|
+
duration: ":beat.duration",
|
|
229
|
+
},
|
|
230
|
+
cache: {
|
|
231
|
+
force: [":context.force"],
|
|
232
|
+
file: ":preprocessor.lipSyncFile",
|
|
233
|
+
index: ":__mapIndex",
|
|
234
|
+
sessionType: "lipSync",
|
|
235
|
+
mulmoContext: ":context",
|
|
236
|
+
},
|
|
181
237
|
},
|
|
238
|
+
defaultValue: {},
|
|
182
239
|
},
|
|
183
240
|
output: {
|
|
184
241
|
agent: "copyAgent",
|
|
185
242
|
inputs: {
|
|
186
|
-
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
|
|
243
|
+
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator", ":lipSyncGenerator"], // to wait for imageFromMovie, soundEffectGenerator, and lipSyncGenerator to finish
|
|
187
244
|
imageFile: ":preprocessor.imagePath",
|
|
188
245
|
movieFile: ":preprocessor.movieFile",
|
|
246
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
247
|
+
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
189
248
|
hasMovieAudio: ":audioChecker.hasMovieAudio",
|
|
190
249
|
},
|
|
191
250
|
output: {
|
|
192
251
|
imageFile: ".imageFile",
|
|
193
252
|
movieFile: ".movieFile",
|
|
253
|
+
soundEffectFile: ".soundEffectFile",
|
|
254
|
+
lipSyncFile: ".lipSyncFile",
|
|
194
255
|
hasMovieAudio: ".hasMovieAudio",
|
|
195
256
|
},
|
|
196
257
|
isResult: true,
|
|
@@ -284,7 +345,7 @@ export const graphOption = async (context, settings) => {
|
|
|
284
345
|
{
|
|
285
346
|
name: "fileCacheAgentFilter",
|
|
286
347
|
agent: fileCacheAgentFilter,
|
|
287
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
|
|
348
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator", "lipSyncGenerator"],
|
|
288
349
|
},
|
|
289
350
|
],
|
|
290
351
|
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
package/lib/actions/movie.js
CHANGED
|
@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
162
162
|
beatTimestamps.push(timestamp);
|
|
163
163
|
return timestamp; // Skip voice-over beats.
|
|
164
164
|
}
|
|
165
|
-
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
165
|
+
const sourceFile = studioBeat.lipSyncFile ?? studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
|
|
166
166
|
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
167
167
|
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
168
168
|
const extraPadding = (() => {
|
|
@@ -183,7 +183,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
183
183
|
const defaultFillOption = mulmoFillOptionSchema.parse({}); // let the schema infer the default value
|
|
184
184
|
const fillOption = { ...defaultFillOption, ...globalFillOption, ...beatFillOption };
|
|
185
185
|
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
186
|
-
const mediaType = studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
186
|
+
const mediaType = studioBeat.lipSyncFile || studioBeat.movieFile ? "movie" : MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat);
|
|
187
187
|
const speed = beat.movieParams?.speed ?? 1.0;
|
|
188
188
|
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo, fillOption, speed);
|
|
189
189
|
ffmpegContext.filterComplex.push(videoPart);
|
|
@@ -206,6 +206,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
206
206
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
207
207
|
const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
|
|
208
208
|
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
209
|
+
// TODO: Handle a special case where it has lipSyncFile AND hasMovieAudio is on (the source file has an audio, such as sound effect).
|
|
209
210
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
210
211
|
audioIdsFromMovieBeats.push(audioId);
|
|
211
212
|
ffmpegContext.filterComplex.push(audioPart);
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -10,8 +10,10 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
|
+
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
|
+
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
13
15
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
14
16
|
import { textInputAgent } from "@graphai/input_agents";
|
|
15
17
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
16
18
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
17
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
19
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -10,9 +10,11 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
|
+
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
|
+
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
13
15
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
14
16
|
import { textInputAgent } from "@graphai/input_agents";
|
|
15
17
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
16
18
|
// import * as vanilla from "@graphai/vanilla";
|
|
17
19
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
18
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
20
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentParams, ReplicateLipSyncAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const lipSyncReplicateAgent: AgentFunction<ReplicateLipSyncAgentParams, AgentBufferResult, LipSyncAgentInputs, ReplicateLipSyncAgentConfig>;
|
|
4
|
+
declare const lipSyncReplicateAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default lipSyncReplicateAgentInfo;
|