mulmocast 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +13 -18
- package/lib/actions/image_agents.d.ts +30 -6
- package/lib/actions/image_agents.js +5 -2
- package/lib/actions/image_references.js +2 -1
- package/lib/actions/images.d.ts +9 -1
- package/lib/actions/images.js +38 -13
- package/lib/actions/movie.js +3 -2
- package/lib/agents/add_bgm_agent.js +1 -1
- package/lib/agents/combine_audio_files_agent.js +10 -7
- package/lib/agents/image_google_agent.js +2 -2
- package/lib/agents/image_openai_agent.js +2 -2
- package/lib/agents/movie_replicate_agent.js +1 -1
- package/lib/agents/tts_elevenlabs_agent.d.ts +2 -1
- package/lib/agents/tts_elevenlabs_agent.js +4 -3
- package/lib/agents/tts_google_agent.d.ts +2 -9
- package/lib/agents/tts_nijivoice_agent.d.ts +2 -1
- package/lib/agents/tts_nijivoice_agent.js +3 -3
- package/lib/agents/tts_openai_agent.d.ts +2 -13
- package/lib/agents/tts_openai_agent.js +4 -3
- package/lib/index.browser.d.ts +1 -0
- package/lib/index.browser.js +1 -0
- package/lib/index.d.ts +1 -0
- package/lib/index.js +2 -0
- package/lib/methods/mulmo_presentation_style.d.ts +2 -1
- package/lib/methods/mulmo_presentation_style.js +21 -17
- package/lib/types/agent.d.ts +29 -2
- package/lib/types/agent.js +0 -1
- package/lib/types/schema.d.ts +596 -485
- package/lib/types/schema.js +15 -11
- package/lib/utils/const.d.ts +0 -1
- package/lib/utils/const.js +0 -1
- package/lib/utils/context.d.ts +36 -30
- package/lib/utils/ffmpeg_utils.d.ts +4 -1
- package/lib/utils/ffmpeg_utils.js +2 -1
- package/lib/utils/preprocess.d.ts +28 -24
- package/lib/utils/provider2agent.d.ts +76 -0
- package/lib/utils/provider2agent.js +87 -0
- package/lib/utils/utils.d.ts +6 -11
- package/lib/utils/utils.js +5 -26
- package/package.json +2 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { userAssert
|
|
2
|
+
import { userAssert } from "../utils/utils.js";
|
|
3
3
|
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
|
|
4
|
-
import {
|
|
4
|
+
import { defaultProviders, provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
|
|
5
5
|
const defaultTextSlideStyles = [
|
|
6
6
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
7
7
|
"body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
@@ -49,10 +49,14 @@ export const MulmoPresentationStyleMethods = {
|
|
|
49
49
|
userAssert(!!speaker, `speaker is not set: speaker "${beat.speaker}"`);
|
|
50
50
|
return speaker;
|
|
51
51
|
},
|
|
52
|
-
|
|
52
|
+
getTTSProvider(presentationStyle, beat) {
|
|
53
53
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
54
54
|
return speaker.provider ?? presentationStyle.speechParams.provider;
|
|
55
55
|
},
|
|
56
|
+
getTTSModel(presentationStyle, beat) {
|
|
57
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
58
|
+
return speaker.model ?? presentationStyle.speechParams.model;
|
|
59
|
+
},
|
|
56
60
|
getVoiceId(presentationStyle, beat) {
|
|
57
61
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
58
62
|
return speaker.voiceId;
|
|
@@ -65,46 +69,46 @@ export const MulmoPresentationStyleMethods = {
|
|
|
65
69
|
// provider and model appropriately.
|
|
66
70
|
const imageParams = { ...presentationStyle.imageParams, ...beat?.imageParams };
|
|
67
71
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(imageParams?.provider);
|
|
72
|
+
const agentInfo = provider2ImageAgent[provider];
|
|
73
|
+
// The default text2image model is gpt-image-1 from OpenAI, and to use it you must have an OpenAI account and have verified your identity. If this is not possible, please specify dall-e-3 as the model.
|
|
68
74
|
const defaultImageParams = {
|
|
69
75
|
provider,
|
|
70
|
-
model:
|
|
76
|
+
model: agentInfo.defaultModel,
|
|
71
77
|
};
|
|
72
78
|
return {
|
|
73
|
-
agent:
|
|
79
|
+
agent: agentInfo.agentName,
|
|
74
80
|
imageParams: { ...defaultImageParams, ...imageParams },
|
|
75
81
|
};
|
|
76
82
|
},
|
|
77
83
|
// Determine movie agent based on provider
|
|
78
84
|
getMovieAgent(presentationStyle) {
|
|
79
|
-
const movieProvider = presentationStyle.movieParams?.provider ??
|
|
80
|
-
|
|
81
|
-
case "replicate":
|
|
82
|
-
return "movieReplicateAgent";
|
|
83
|
-
case "google":
|
|
84
|
-
default:
|
|
85
|
-
return "movieGoogleAgent";
|
|
86
|
-
}
|
|
85
|
+
const movieProvider = (presentationStyle.movieParams?.provider ?? defaultProviders.text2movie);
|
|
86
|
+
return provider2MovieAgent[movieProvider].agentName;
|
|
87
87
|
},
|
|
88
88
|
getConcurrency(presentationStyle) {
|
|
89
|
+
/*
|
|
89
90
|
if (presentationStyle.movieParams?.provider === "replicate") {
|
|
90
|
-
|
|
91
|
+
return 4;
|
|
91
92
|
}
|
|
93
|
+
*/
|
|
92
94
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
|
|
93
95
|
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
94
96
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
95
97
|
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
96
98
|
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
97
|
-
|
|
99
|
+
if (imageAgentInfo.imageParams.model === provider2ImageAgent.openai.defaultModel) {
|
|
100
|
+
return 16;
|
|
101
|
+
}
|
|
98
102
|
}
|
|
99
103
|
return 4;
|
|
100
104
|
},
|
|
101
105
|
getHtmlImageAgentInfo(presentationStyle) {
|
|
102
106
|
const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
|
|
103
|
-
const defaultConfig =
|
|
107
|
+
const defaultConfig = provider2LLMAgent[provider];
|
|
104
108
|
const model = presentationStyle.htmlImageParams?.model ? presentationStyle.htmlImageParams?.model : defaultConfig.defaultModel;
|
|
105
109
|
return {
|
|
106
110
|
provider,
|
|
107
|
-
agent: defaultConfig.
|
|
111
|
+
agent: defaultConfig.agentName,
|
|
108
112
|
model,
|
|
109
113
|
max_tokens: defaultConfig.max_tokens,
|
|
110
114
|
};
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -13,6 +13,15 @@ export type AgentBufferResult = {
|
|
|
13
13
|
export type AgentPromptInputs = {
|
|
14
14
|
prompt: string;
|
|
15
15
|
};
|
|
16
|
+
export type AgentTextInputs = {
|
|
17
|
+
text: string;
|
|
18
|
+
};
|
|
19
|
+
export type AgentErrorResult = {
|
|
20
|
+
error: unknown;
|
|
21
|
+
};
|
|
22
|
+
export type AgentConfig = {
|
|
23
|
+
apiKey?: string;
|
|
24
|
+
};
|
|
16
25
|
export type ImageAgentInputs = AgentPromptInputs;
|
|
17
26
|
export type OpenAIImageAgentInputs = AgentPromptInputs & {
|
|
18
27
|
referenceImages: string[] | null | undefined;
|
|
@@ -50,6 +59,24 @@ export type ReplicateMovieAgentParams = {
|
|
|
50
59
|
duration?: number;
|
|
51
60
|
};
|
|
52
61
|
export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
|
|
53
|
-
export type ReplicateMovieAgentConfig =
|
|
54
|
-
|
|
62
|
+
export type ReplicateMovieAgentConfig = AgentConfig;
|
|
63
|
+
export type TTSAgentParams = {
|
|
64
|
+
suppressError: boolean;
|
|
65
|
+
voice: string;
|
|
66
|
+
};
|
|
67
|
+
export type OpenAITTSAgentParams = TTSAgentParams & {
|
|
68
|
+
instructions: string;
|
|
69
|
+
model: string;
|
|
70
|
+
};
|
|
71
|
+
export type NijivoiceTTSAgentParams = TTSAgentParams & {
|
|
72
|
+
speed: number;
|
|
73
|
+
speed_global: number;
|
|
74
|
+
};
|
|
75
|
+
export type GoogleTTSAgentParams = TTSAgentParams & {
|
|
76
|
+
speed: number;
|
|
77
|
+
};
|
|
78
|
+
export type ElevenlabsTTSAgentParams = TTSAgentParams & {
|
|
79
|
+
model: string;
|
|
80
|
+
stability: number;
|
|
81
|
+
similarityBoost: number;
|
|
55
82
|
};
|
package/lib/types/agent.js
CHANGED