mulmocast 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +10 -1
- package/lib/actions/image_agents.d.ts +3 -12
- package/lib/actions/image_agents.js +12 -8
- package/lib/actions/images.js +2 -1
- package/lib/actions/translate.js +13 -31
- package/lib/agents/lipsync_replicate_agent.js +10 -3
- package/lib/cli/commands/audio/handler.js +1 -1
- package/lib/cli/commands/image/handler.js +1 -1
- package/lib/cli/commands/movie/handler.js +1 -1
- package/lib/cli/commands/pdf/handler.js +1 -1
- package/lib/cli/helpers.d.ts +1 -4
- package/lib/cli/helpers.js +3 -2
- package/lib/mcp/server.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +3 -2
- package/lib/methods/mulmo_script.js +4 -1
- package/lib/methods/mulmo_studio_context.d.ts +1 -0
- package/lib/methods/mulmo_studio_context.js +8 -0
- package/lib/types/agent.d.ts +1 -0
- package/lib/types/schema.d.ts +8 -8
- package/lib/types/schema.js +1 -1
- package/lib/types/type.d.ts +1 -1
- package/lib/utils/const.js +1 -1
- package/lib/utils/context.d.ts +376 -34
- package/lib/utils/context.js +95 -56
- package/lib/utils/filters.d.ts +1 -0
- package/lib/utils/filters.js +8 -0
- package/lib/utils/preprocess.d.ts +2 -2
- package/lib/utils/preprocess.js +3 -3
- package/lib/utils/provider2agent.d.ts +3 -2
- package/lib/utils/provider2agent.js +20 -2
- package/lib/utils/string.d.ts +1 -1
- package/lib/utils/string.js +11 -8
- package/package.json +1 -1
- package/scripts/templates/image_refs.json +1 -0
- package/scripts/templates/voice_over.json +1 -0
- package/scripts/test/gpt.json +1 -0
- package/scripts/test/test1.json +1 -0
- package/scripts/test/test_audio.json +1 -0
- package/scripts/test/test_audio_instructions.json +1 -0
- package/scripts/test/test_beats.json +1 -0
- package/scripts/test/test_captions.json +1 -0
- package/scripts/test/test_elevenlabs_models.json +1 -0
- package/scripts/test/test_hello.json +1 -0
- package/scripts/test/test_hello_google.json +1 -0
- package/scripts/test/test_html.json +1 -0
- package/scripts/test/test_image_refs.json +1 -0
- package/scripts/test/test_images.json +1 -0
- package/scripts/test/test_lang.json +58 -2
- package/scripts/test/test_layout.json +1 -0
- package/scripts/test/test_lipsync.json +9 -0
- package/scripts/test/test_loop.json +1 -0
- package/scripts/test/test_media.json +1 -0
- package/scripts/test/test_mixed_providers.json +1 -0
- package/scripts/test/test_movie.json +1 -0
- package/scripts/test/test_no_audio.json +1 -0
- package/scripts/test/test_no_audio_with_credit.json +1 -0
- package/scripts/test/test_order.json +1 -0
- package/scripts/test/test_order_portrait.json +1 -0
- package/scripts/test/test_replicate.json +19 -0
- package/scripts/test/test_slideout_left_no_audio.json +1 -0
- package/scripts/test/test_spillover.json +1 -0
- package/scripts/test/test_transition.json +1 -0
- package/scripts/test/test_transition_no_audio.json +1 -0
- package/scripts/test/test_video_speed.json +1 -0
- package/scripts/test/test_voice_over.json +1 -0
- package/scripts/test/test_voices.json +1 -0
- package/scripts/templates/image_prompt_only_template.ts +0 -95
|
@@ -67,9 +67,10 @@ export declare const provider2LipSyncAgent: {
|
|
|
67
67
|
defaultModel: ReplicateModel;
|
|
68
68
|
models: ReplicateModel[];
|
|
69
69
|
modelParams: Record<ReplicateModel, {
|
|
70
|
-
identifier?: `${string}/${string}:${string}`;
|
|
71
|
-
video
|
|
70
|
+
identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
|
|
71
|
+
video?: string;
|
|
72
72
|
audio: string;
|
|
73
|
+
image?: string;
|
|
73
74
|
}>;
|
|
74
75
|
};
|
|
75
76
|
};
|
|
@@ -50,6 +50,8 @@ export const provider2MovieAgent = {
|
|
|
50
50
|
"minimax/video-01",
|
|
51
51
|
"minimax/hailuo-02",
|
|
52
52
|
"pixverse/pixverse-v4.5",
|
|
53
|
+
"wan-video/wan-2.2-i2v-480p-fast",
|
|
54
|
+
"wan-video/wan-2.2-t2v-480p-fast",
|
|
53
55
|
],
|
|
54
56
|
modelParams: {
|
|
55
57
|
"bytedance/seedance-1-lite": {
|
|
@@ -110,6 +112,16 @@ export const provider2MovieAgent = {
|
|
|
110
112
|
last_image: "last_frame_image",
|
|
111
113
|
price_per_sec: 0.12,
|
|
112
114
|
},
|
|
115
|
+
"wan-video/wan-2.2-i2v-480p-fast": {
|
|
116
|
+
durations: [5],
|
|
117
|
+
start_image: "image",
|
|
118
|
+
price_per_sec: 0.012,
|
|
119
|
+
},
|
|
120
|
+
"wan-video/wan-2.2-t2v-480p-fast": {
|
|
121
|
+
durations: [5],
|
|
122
|
+
start_image: undefined,
|
|
123
|
+
price_per_sec: 0.012,
|
|
124
|
+
},
|
|
113
125
|
},
|
|
114
126
|
},
|
|
115
127
|
google: {
|
|
@@ -133,8 +145,8 @@ export const provider2SoundEffectAgent = {
|
|
|
133
145
|
export const provider2LipSyncAgent = {
|
|
134
146
|
replicate: {
|
|
135
147
|
agentName: "lipSyncReplicateAgent",
|
|
136
|
-
defaultModel: "bytedance/
|
|
137
|
-
models: ["bytedance/latentsync", "tmappdev/lipsync"],
|
|
148
|
+
defaultModel: "bytedance/omni-human",
|
|
149
|
+
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
|
|
138
150
|
modelParams: {
|
|
139
151
|
"bytedance/latentsync": {
|
|
140
152
|
identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
|
|
@@ -146,6 +158,12 @@ export const provider2LipSyncAgent = {
|
|
|
146
158
|
video: "video_input",
|
|
147
159
|
audio: "audio_input",
|
|
148
160
|
},
|
|
161
|
+
"bytedance/omni-human": {
|
|
162
|
+
identifier: "bytedance/omni-human",
|
|
163
|
+
image: "image",
|
|
164
|
+
audio: "audio",
|
|
165
|
+
price_per_sec: 0.14,
|
|
166
|
+
},
|
|
149
167
|
/* NOTE: This model does not work with large base64 urls.
|
|
150
168
|
"sync/lipsync-2": {
|
|
151
169
|
video: "video",
|
package/lib/utils/string.d.ts
CHANGED
|
@@ -4,6 +4,6 @@ interface Replacement {
|
|
|
4
4
|
from: string;
|
|
5
5
|
to: string;
|
|
6
6
|
}
|
|
7
|
-
export declare function replacePairsJa(
|
|
7
|
+
export declare function replacePairsJa(replacements: Replacement[]): (str: string) => string;
|
|
8
8
|
export declare const replacementsJa: Replacement[];
|
|
9
9
|
export {};
|
package/lib/utils/string.js
CHANGED
|
@@ -24,14 +24,16 @@ export const recursiveSplitJa = (text) => {
|
|
|
24
24
|
}, [text])
|
|
25
25
|
.flat(1);
|
|
26
26
|
};
|
|
27
|
-
export function replacePairsJa(
|
|
28
|
-
return
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
export function replacePairsJa(replacements) {
|
|
28
|
+
return (str) => {
|
|
29
|
+
return replacements.reduce((tmp, current) => {
|
|
30
|
+
const { from, to } = current;
|
|
31
|
+
// Escape any special regex characters in the 'from' string.
|
|
32
|
+
const escapedFrom = from.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
33
|
+
const regex = new RegExp(escapedFrom, "g");
|
|
34
|
+
return tmp.replace(regex, to);
|
|
35
|
+
}, str);
|
|
36
|
+
};
|
|
35
37
|
}
|
|
36
38
|
export const replacementsJa = [
|
|
37
39
|
{ from: "Anthropic", to: "アンスロピック" },
|
|
@@ -51,4 +53,5 @@ export const replacementsJa = [
|
|
|
51
53
|
{ from: "5つ", to: "いつつ" },
|
|
52
54
|
{ from: "危険な面", to: "危険なめん" },
|
|
53
55
|
{ from: "その通り!", to: "その通り。" },
|
|
56
|
+
{ from: "%", to: "パーセント" },
|
|
54
57
|
];
|
package/package.json
CHANGED
package/scripts/test/gpt.json
CHANGED
package/scripts/test/test1.json
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
"version": "1.1",
|
|
4
4
|
"credit": "closing"
|
|
5
5
|
},
|
|
6
|
+
"lang": "en",
|
|
6
7
|
"title": "The Honey Trap vs. The Frontier: Engineering at Tesla and SpaceX",
|
|
7
8
|
"description": "We dive into Elon Musk's philosophy on building environments where engineers can truly flourish, contrasting the comfort-focused 'honey trap' with the high-expectation culture at Tesla and SpaceX.",
|
|
8
9
|
"speechParams": {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"$mulmocast": {
|
|
3
3
|
"version": "1.1"
|
|
4
4
|
},
|
|
5
|
-
"lang": "
|
|
5
|
+
"lang": "en",
|
|
6
6
|
"speechParams": {
|
|
7
7
|
"speakers": {
|
|
8
8
|
"Presenter": {
|
|
@@ -19,13 +19,69 @@
|
|
|
19
19
|
},
|
|
20
20
|
"beats": [
|
|
21
21
|
{
|
|
22
|
-
"text": "
|
|
22
|
+
"text": "Hello World",
|
|
23
23
|
"image": {
|
|
24
24
|
"type": "textSlide",
|
|
25
25
|
"slide": {
|
|
26
26
|
"title": "Hello World"
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"text": "Switching Language and Switching Screen should match.",
|
|
32
|
+
"image": {
|
|
33
|
+
"type": "textSlide",
|
|
34
|
+
"slide": {
|
|
35
|
+
"title": "Switching Language"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"text": "May the force be with you.",
|
|
41
|
+
"image": {
|
|
42
|
+
"type": "textSlide",
|
|
43
|
+
"slide": {
|
|
44
|
+
"title": "May the force be with you"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"text": "Switching Language and Switching Screen should match even with a short duration.",
|
|
50
|
+
"duration": 1,
|
|
51
|
+
"image": {
|
|
52
|
+
"type": "textSlide",
|
|
53
|
+
"slide": {
|
|
54
|
+
"title": "Switching Language (short duration)"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"text": "Switching Language and Switching Screen should match even with a long duration.",
|
|
60
|
+
"duration": 8,
|
|
61
|
+
"image": {
|
|
62
|
+
"type": "textSlide",
|
|
63
|
+
"slide": {
|
|
64
|
+
"title": "Switching Language (long duration)"
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"text": "The yield of TSMC's 3nm process exceeeds 70%.",
|
|
70
|
+
"image": {
|
|
71
|
+
"type": "textSlide",
|
|
72
|
+
"slide": {
|
|
73
|
+
"title": "Text replacement test for nijivoice"
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"text": "Goodbye",
|
|
79
|
+
"image": {
|
|
80
|
+
"type": "textSlide",
|
|
81
|
+
"slide": {
|
|
82
|
+
"title": "Goodbye"
|
|
83
|
+
}
|
|
84
|
+
}
|
|
29
85
|
}
|
|
30
86
|
]
|
|
31
87
|
}
|
|
@@ -33,6 +33,15 @@
|
|
|
33
33
|
"model": "tmappdev/lipsync"
|
|
34
34
|
}
|
|
35
35
|
},
|
|
36
|
+
{
|
|
37
|
+
"id": "future_omni_human_photo_realistic",
|
|
38
|
+
"text": "In the future, advancements in AI could revolutionize industries like healthcare, education, and transportation.",
|
|
39
|
+
"imagePrompt": "A female presenter is standing in front of a futuristic cityscape with AI-powered hospital and an autonomous vehicle. Photo realistic.",
|
|
40
|
+
"enableLipSync": true,
|
|
41
|
+
"lipSyncParams": {
|
|
42
|
+
"model": "bytedance/omni-human"
|
|
43
|
+
}
|
|
44
|
+
},
|
|
36
45
|
{
|
|
37
46
|
"id": "future_possibilities",
|
|
38
47
|
"text": "In the future, advancements in AI could revolutionize industries like healthcare, education, and transportation.",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$mulmocast": { "version": "1.1" },
|
|
3
|
+
"lang": "en",
|
|
3
4
|
"beats": [
|
|
4
5
|
{
|
|
5
6
|
"text": "The number 1 stands alone—independent, bold, and often lonely. In stories, the lone hero begins their journey without companions, carving their path through courage. Think of Neo in The Matrix, 'The One', chosen to awaken humanity. Or the number 1 in binary—on or off, light or dark. It reminds us that every great movement starts with a single step, a single decision. Just one person can spark a revolution or change history. As Lao Tzu said, 'The journey of a thousand miles begins with one step.'",
|
|
@@ -121,6 +121,25 @@
|
|
|
121
121
|
"movieParams": {
|
|
122
122
|
"model": "minimax/hailuo-02"
|
|
123
123
|
}
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"id": "wan-2.2-i2v-480p-fast",
|
|
127
|
+
"text": "wan-video/wan-2.2-i2v-480p-fast",
|
|
128
|
+
"duration": 5,
|
|
129
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
130
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
131
|
+
"movieParams": {
|
|
132
|
+
"model": "wan-video/wan-2.2-i2v-480p-fast"
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": "wan-2.2-t2v-480p-fast",
|
|
137
|
+
"text": "wan-video/wan-2.2-t2v-480p-fast",
|
|
138
|
+
"duration": 5,
|
|
139
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
140
|
+
"movieParams": {
|
|
141
|
+
"model": "wan-video/wan-2.2-t2v-480p-fast"
|
|
142
|
+
}
|
|
124
143
|
}
|
|
125
144
|
]
|
|
126
145
|
}
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
mulmoCastCreditSchema,
|
|
3
|
-
langSchema,
|
|
4
|
-
}
|
|
5
|
-
export const mulmoPresentationStyleSchema = z.object({
|
|
6
|
-
$mulmocast: mulmoCastCreditSchema,
|
|
7
|
-
"lang": "en",
|
|
8
|
-
title: z.string().default("[TITLE: Brief, engaging title for the topic]")
|
|
9
|
-
lang: z.string().default("en"),
|
|
10
|
-
"references": z.array(z.object({
|
|
11
|
-
"url": "[SOURCE_URL: URL of the source material]",
|
|
12
|
-
"title": "[SOURCE_TITLE: Title of the referenced article, or paper]",
|
|
13
|
-
"type": "[SOURCE_TYPE: article, paper]"
|
|
14
|
-
}
|
|
15
|
-
],
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
description: z.string().optional(),
|
|
19
|
-
references: z.array(mulmoReferenceSchema).optional(),
|
|
20
|
-
beats: z.array(mulmoBeatSchema).min(1),
|
|
21
|
-
|
|
22
|
-
// TODO: Delete it later
|
|
23
|
-
imagePath: z.string().optional(), // for keynote images movie ??
|
|
24
|
-
canvasSize: mulmoCanvasDimensionSchema, // has default value
|
|
25
|
-
speechParams: z
|
|
26
|
-
.object({
|
|
27
|
-
speakers: speakerDictionarySchema,
|
|
28
|
-
})
|
|
29
|
-
.default({
|
|
30
|
-
speakers: {
|
|
31
|
-
[defaultSpeaker]: {
|
|
32
|
-
voiceId: "shimmer",
|
|
33
|
-
displayName: {
|
|
34
|
-
en: defaultSpeaker,
|
|
35
|
-
},
|
|
36
|
-
},
|
|
37
|
-
},
|
|
38
|
-
}),
|
|
39
|
-
imageParams: mulmoImageParamsSchema.optional().default({
|
|
40
|
-
provider: defaultProviders.text2image,
|
|
41
|
-
images: {},
|
|
42
|
-
}),
|
|
43
|
-
movieParams: mulmoMovieParamsSchema.optional().default({
|
|
44
|
-
provider: defaultProviders.text2movie,
|
|
45
|
-
}),
|
|
46
|
-
soundEffectParams: mulmoSoundEffectParamsSchema.optional().default({
|
|
47
|
-
provider: defaultProviders.soundEffect,
|
|
48
|
-
}),
|
|
49
|
-
lipSyncParams: mulmoLipSyncParamsSchema.optional(),
|
|
50
|
-
htmlImageParams: mulmoHtmlImageParamsSchema
|
|
51
|
-
.extend({
|
|
52
|
-
provider: text2HtmlImageProviderSchema,
|
|
53
|
-
})
|
|
54
|
-
.optional(),
|
|
55
|
-
// for textSlides
|
|
56
|
-
textSlideParams: textSlideParamsSchema.optional(),
|
|
57
|
-
captionParams: mulmoCaptionParamsSchema.optional(),
|
|
58
|
-
audioParams: audioParamsSchema.default({
|
|
59
|
-
introPadding: 1.0,
|
|
60
|
-
padding: 0.3,
|
|
61
|
-
closingPadding: 0.8,
|
|
62
|
-
outroPadding: 1.0,
|
|
63
|
-
bgmVolume: 0.2,
|
|
64
|
-
audioVolume: 1.0,
|
|
65
|
-
}),
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
{
|
|
69
|
-
"beats": [
|
|
70
|
-
{
|
|
71
|
-
"text": "[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]",
|
|
72
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
73
|
-
},
|
|
74
|
-
{
|
|
75
|
-
"text": "[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]",
|
|
76
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"text": "[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]",
|
|
80
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
81
|
-
},
|
|
82
|
-
{
|
|
83
|
-
"text": "[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]",
|
|
84
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
85
|
-
},
|
|
86
|
-
{
|
|
87
|
-
"text": "[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]",
|
|
88
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
"text": "[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]",
|
|
92
|
-
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
93
|
-
}
|
|
94
|
-
]
|
|
95
|
-
}
|