mulmocast 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.d.ts +0 -1
- package/lib/actions/audio.js +8 -12
- package/lib/actions/images.js +1 -0
- package/lib/actions/movie.js +1 -3
- package/lib/agents/image_openai_agent.js +4 -1
- package/lib/methods/mulmo_presentation_style.d.ts +2 -3
- package/lib/methods/mulmo_presentation_style.js +14 -8
- package/lib/types/agent.d.ts +3 -0
- package/lib/types/schema.d.ts +704 -0
- package/lib/types/schema.js +5 -1
- package/lib/utils/context.d.ts +25 -0
- package/lib/utils/file.d.ts +1 -1
- package/lib/utils/file.js +5 -2
- package/lib/utils/preprocess.d.ts +13 -0
- package/package.json +2 -1
- package/scripts/templates/image_prompt_only_template.ts +95 -0
- package/scripts/test/gpt.json +32 -0
- package/scripts/test/mulmo_story.json +11 -0
- package/scripts/test/test.json +64 -0
- package/scripts/test/test1.json +40 -0
- package/scripts/test/test2.json +66 -0
- package/scripts/test/test_audio.json +151 -0
- package/scripts/test/test_audio_instructions.json +69 -0
- package/scripts/test/test_beats.json +58 -0
- package/scripts/test/test_captions.json +52 -0
- package/scripts/test/test_elevenlabs_models.json +193 -0
- package/scripts/test/test_en.json +29 -0
- package/scripts/test/test_hello.json +17 -0
- package/scripts/test/test_hello_google.json +25 -0
- package/scripts/test/test_html.json +66 -0
- package/scripts/test/test_image_refs.json +49 -0
- package/scripts/test/test_images.json +48 -0
- package/scripts/test/test_lang.json +31 -0
- package/scripts/test/test_layout.json +152 -0
- package/scripts/test/test_lipsync.json +53 -0
- package/scripts/test/test_loop.json +34 -0
- package/scripts/test/test_media.json +244 -0
- package/scripts/test/test_mixed_providers.json +91 -0
- package/scripts/test/test_movie.json +39 -0
- package/scripts/test/test_no_audio.json +252 -0
- package/scripts/test/test_no_audio_with_credit.json +253 -0
- package/scripts/test/test_order.json +68 -0
- package/scripts/test/test_order_portrait.json +72 -0
- package/scripts/test/test_replicate.json +126 -0
- package/scripts/test/test_slideout_left_no_audio.json +45 -0
- package/scripts/test/test_sound_effect.json +41 -0
- package/scripts/test/test_spillover.json +116 -0
- package/scripts/test/test_transition.json +55 -0
- package/scripts/test/test_transition_no_audio.json +45 -0
- package/scripts/test/test_video_speed.json +80 -0
- package/scripts/test/test_voice_over.json +104 -0
- package/scripts/test/test_voices.json +54 -0
package/lib/actions/audio.d.ts
CHANGED
|
@@ -2,6 +2,5 @@ import "dotenv/config";
|
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
3
|
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
4
|
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
5
|
-
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
6
5
|
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
7
6
|
export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -9,7 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
|
9
9
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
|
-
import { text2SpeechProviderSchema
|
|
12
|
+
import { text2SpeechProviderSchema } from "../types/index.js";
|
|
13
13
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
14
14
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
15
15
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
@@ -30,15 +30,15 @@ const getAudioPath = (context, beat, audioFile) => {
|
|
|
30
30
|
}
|
|
31
31
|
return audioFile;
|
|
32
32
|
};
|
|
33
|
-
const getAudioParam = (
|
|
34
|
-
const speaker = MulmoPresentationStyleMethods.getSpeaker(
|
|
33
|
+
const getAudioParam = (context, beat) => {
|
|
34
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
|
|
35
35
|
const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
|
|
36
36
|
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
37
37
|
return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
|
|
38
38
|
};
|
|
39
39
|
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
40
40
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
41
|
-
const { voiceId, provider, speechOptions, model } = getAudioParam(context
|
|
41
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
|
|
42
42
|
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
|
|
43
43
|
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
44
44
|
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
@@ -46,9 +46,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
|
46
46
|
};
|
|
47
47
|
const preprocessor = (namedInputs) => {
|
|
48
48
|
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
49
|
-
const { lang
|
|
49
|
+
const { lang } = context;
|
|
50
50
|
const text = localizedText(beat, multiLingual, lang);
|
|
51
|
-
const { voiceId, provider, speechOptions, model } = getAudioParam(
|
|
51
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
|
|
52
52
|
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
53
53
|
studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
|
|
54
54
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
@@ -174,11 +174,6 @@ const agentFilters = [
|
|
|
174
174
|
nodeIds: ["tts"],
|
|
175
175
|
},
|
|
176
176
|
];
|
|
177
|
-
export const audioFilePath = (context) => {
|
|
178
|
-
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
179
|
-
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
180
|
-
return getAudioArtifactFilePath(outDirPath, fileName);
|
|
181
|
-
};
|
|
182
177
|
const getConcurrency = (context) => {
|
|
183
178
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
184
179
|
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
@@ -231,7 +226,7 @@ export const audio = async (context, settings, callbacks) => {
|
|
|
231
226
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
232
227
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
233
228
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
234
|
-
const audioArtifactFilePath =
|
|
229
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(context);
|
|
235
230
|
const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
|
|
236
231
|
const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
|
|
237
232
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
|
|
@@ -253,6 +248,7 @@ export const audio = async (context, settings, callbacks) => {
|
|
|
253
248
|
const result = await graph.run();
|
|
254
249
|
writingMessage(audioCombinedFilePath);
|
|
255
250
|
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
251
|
+
writingMessage(audioArtifactFilePath);
|
|
256
252
|
return result.combineFiles;
|
|
257
253
|
}
|
|
258
254
|
catch (__error) {
|
package/lib/actions/images.js
CHANGED
|
@@ -135,6 +135,7 @@ const beat_graph_data = {
|
|
|
135
135
|
model: ":preprocessor.imageParams.model",
|
|
136
136
|
moderation: ":preprocessor.imageParams.moderation",
|
|
137
137
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
138
|
+
quality: ":preprocessor.imageParams.quality",
|
|
138
139
|
},
|
|
139
140
|
},
|
|
140
141
|
defaultValue: {},
|
package/lib/actions/movie.js
CHANGED
|
@@ -246,9 +246,7 @@ export const movieFilePath = (context) => {
|
|
|
246
246
|
export const movie = async (context) => {
|
|
247
247
|
MulmoStudioContextMethods.setSessionState(context, "video", true);
|
|
248
248
|
try {
|
|
249
|
-
const
|
|
250
|
-
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
251
|
-
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
|
|
249
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(context);
|
|
252
250
|
const outputVideoPath = movieFilePath(context);
|
|
253
251
|
if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
|
|
254
252
|
writingMessage(outputVideoPath);
|
|
@@ -6,7 +6,7 @@ import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
7
|
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
8
|
const { prompt, referenceImages } = namedInputs;
|
|
9
|
-
const { moderation, canvasSize } = params;
|
|
9
|
+
const { moderation, canvasSize, quality } = params;
|
|
10
10
|
const { apiKey, baseURL } = { ...config };
|
|
11
11
|
const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
|
|
12
12
|
const openai = new OpenAI({ apiKey, baseURL });
|
|
@@ -42,6 +42,9 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
|
42
42
|
};
|
|
43
43
|
if (model === "gpt-image-1") {
|
|
44
44
|
imageOptions.moderation = moderation || "auto";
|
|
45
|
+
if (quality) {
|
|
46
|
+
imageOptions.quality = quality;
|
|
47
|
+
}
|
|
45
48
|
}
|
|
46
49
|
const response = await (async () => {
|
|
47
50
|
try {
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
|
|
2
|
+
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
|
|
3
3
|
export declare const MulmoPresentationStyleMethods: {
|
|
4
4
|
getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
|
|
5
5
|
getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
|
|
6
6
|
getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
7
7
|
getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
|
|
8
|
-
getSpeaker(
|
|
9
|
-
getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
|
|
8
|
+
getSpeaker(context: MulmoStudioContext, beat: MulmoBeat): SpeakerData;
|
|
10
9
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
11
10
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
12
11
|
getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
|
|
@@ -46,18 +46,24 @@ export const MulmoPresentationStyleMethods = {
|
|
|
46
46
|
}
|
|
47
47
|
return keys[0];
|
|
48
48
|
},
|
|
49
|
-
getSpeaker(
|
|
50
|
-
userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
|
|
51
|
-
const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
|
|
52
|
-
|
|
53
|
-
const speaker = presentationStyle.speechParams.speakers[speakerId];
|
|
49
|
+
getSpeaker(context, beat) {
|
|
50
|
+
userAssert(!!context.presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
|
|
51
|
+
const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(context.presentationStyle);
|
|
52
|
+
const speaker = context.presentationStyle.speechParams.speakers[speakerId];
|
|
54
53
|
userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
|
|
54
|
+
// Check if the speaker has a language-specific version
|
|
55
|
+
const lang = context.lang ?? context.studio.script.lang;
|
|
56
|
+
if (speaker.lang && lang && speaker.lang[lang]) {
|
|
57
|
+
return speaker.lang[lang];
|
|
58
|
+
}
|
|
55
59
|
return speaker;
|
|
56
60
|
},
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
61
|
+
/* NOTE: This method is not used.
|
|
62
|
+
getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
|
|
63
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
|
|
64
|
+
return speaker.model;
|
|
60
65
|
},
|
|
66
|
+
*/
|
|
61
67
|
getText2ImageProvider(provider) {
|
|
62
68
|
return text2ImageProviderSchema.parse(provider);
|
|
63
69
|
},
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export type OpenAIImageSize = "1792x1024" | "1024x1792" | "1024x1024" | "1536x1024" | "1024x1536";
|
|
2
2
|
export type OpenAIImageModeration = "low" | "auto";
|
|
3
|
+
export type OpenAIImageQuality = "low" | "medium" | "high" | "auto";
|
|
3
4
|
export type OpenAIImageOptions = {
|
|
4
5
|
model: string;
|
|
5
6
|
prompt: string;
|
|
6
7
|
n: number;
|
|
7
8
|
size: OpenAIImageSize;
|
|
8
9
|
moderation?: OpenAIImageModeration;
|
|
10
|
+
quality?: OpenAIImageQuality;
|
|
9
11
|
};
|
|
10
12
|
export type AgentBufferResult = {
|
|
11
13
|
buffer: Buffer;
|
|
@@ -35,6 +37,7 @@ export type ImageAgentParams = {
|
|
|
35
37
|
};
|
|
36
38
|
export type OpenAIImageAgentParams = ImageAgentParams & {
|
|
37
39
|
moderation: OpenAIImageModeration | null | undefined;
|
|
40
|
+
quality?: OpenAIImageQuality;
|
|
38
41
|
};
|
|
39
42
|
export type OpenAIImageAgentConfig = {
|
|
40
43
|
baseURL?: string;
|