mulmocast 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +8 -5
- package/lib/actions/image_agents.d.ts +3 -3
- package/lib/actions/image_references.js +2 -1
- package/lib/actions/images.js +16 -2
- package/lib/actions/movie.js +3 -2
- package/lib/agents/add_bgm_agent.js +1 -1
- package/lib/agents/combine_audio_files_agent.js +10 -7
- package/lib/agents/tts_elevenlabs_agent.d.ts +2 -1
- package/lib/agents/tts_elevenlabs_agent.js +4 -3
- package/lib/agents/tts_google_agent.d.ts +2 -9
- package/lib/agents/tts_nijivoice_agent.d.ts +2 -1
- package/lib/agents/tts_nijivoice_agent.js +3 -3
- package/lib/agents/tts_openai_agent.d.ts +2 -13
- package/lib/agents/tts_openai_agent.js +4 -3
- package/lib/index.browser.d.ts +1 -0
- package/lib/index.browser.js +1 -0
- package/lib/index.js +1 -0
- package/lib/methods/mulmo_presentation_style.d.ts +1 -0
- package/lib/methods/mulmo_presentation_style.js +4 -0
- package/lib/types/agent.d.ts +29 -2
- package/lib/types/agent.js +0 -1
- package/lib/types/schema.d.ts +344 -254
- package/lib/types/schema.js +5 -3
- package/lib/utils/context.d.ts +24 -19
- package/lib/utils/ffmpeg_utils.d.ts +4 -1
- package/lib/utils/ffmpeg_utils.js +2 -1
- package/lib/utils/preprocess.d.ts +21 -18
- package/lib/utils/provider2agent.d.ts +4 -0
- package/lib/utils/provider2agent.js +6 -0
- package/package.json +2 -2
package/lib/actions/audio.js
CHANGED
|
@@ -34,12 +34,13 @@ const getAudioParam = (presentationStyle, beat) => {
|
|
|
34
34
|
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
35
35
|
const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
|
|
36
36
|
const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
|
|
37
|
-
|
|
37
|
+
const model = MulmoPresentationStyleMethods.getTTSModel(presentationStyle, beat);
|
|
38
|
+
return { voiceId, provider, speechOptions, model };
|
|
38
39
|
};
|
|
39
40
|
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
40
41
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
41
|
-
const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
|
|
42
|
-
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
|
|
42
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(context.presentationStyle, beat);
|
|
43
|
+
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
|
|
43
44
|
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
44
45
|
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
45
46
|
return getAudioPath(context, beat, audioFile);
|
|
@@ -48,15 +49,16 @@ const preprocessor = (namedInputs) => {
|
|
|
48
49
|
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
49
50
|
const { lang, presentationStyle } = context;
|
|
50
51
|
const text = localizedText(beat, multiLingual, lang);
|
|
51
|
-
const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
|
|
52
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(presentationStyle, beat);
|
|
52
53
|
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
53
|
-
studioBeat.audioFile = audioPath; // TODO
|
|
54
|
+
studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
|
|
54
55
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
55
56
|
return {
|
|
56
57
|
ttsAgent: provider2TTSAgent[provider].agentName,
|
|
57
58
|
text,
|
|
58
59
|
voiceId,
|
|
59
60
|
speechOptions,
|
|
61
|
+
model,
|
|
60
62
|
audioPath,
|
|
61
63
|
studioBeat,
|
|
62
64
|
needsTTS,
|
|
@@ -94,6 +96,7 @@ const graph_tts = {
|
|
|
94
96
|
voice: ":preprocessor.voiceId",
|
|
95
97
|
speed: ":preprocessor.speechOptions.speed",
|
|
96
98
|
instructions: ":preprocessor.speechOptions.instruction",
|
|
99
|
+
model: ":preprocessor.model",
|
|
97
100
|
},
|
|
98
101
|
},
|
|
99
102
|
},
|
|
@@ -14,8 +14,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
14
14
|
referenceImageForMovie: string | undefined;
|
|
15
15
|
imageParams: {
|
|
16
16
|
provider: string;
|
|
17
|
-
style?: string | undefined;
|
|
18
17
|
model?: string | undefined;
|
|
18
|
+
style?: string | undefined;
|
|
19
19
|
moderation?: string | undefined;
|
|
20
20
|
images?: Record<string, {
|
|
21
21
|
type: "image";
|
|
@@ -58,8 +58,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
58
58
|
};
|
|
59
59
|
imageParams: {
|
|
60
60
|
provider: string;
|
|
61
|
-
style?: string | undefined;
|
|
62
61
|
model?: string | undefined;
|
|
62
|
+
style?: string | undefined;
|
|
63
63
|
moderation?: string | undefined;
|
|
64
64
|
images?: Record<string, {
|
|
65
65
|
type: "image";
|
|
@@ -105,8 +105,8 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
105
105
|
};
|
|
106
106
|
imageParams: {
|
|
107
107
|
provider: string;
|
|
108
|
-
style?: string | undefined;
|
|
109
108
|
model?: string | undefined;
|
|
109
|
+
style?: string | undefined;
|
|
110
110
|
moderation?: string | undefined;
|
|
111
111
|
images?: Record<string, {
|
|
112
112
|
type: "image";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
|
-
import { GraphAI } from "graphai";
|
|
2
|
+
import { GraphAI, GraphAILogger } from "graphai";
|
|
3
3
|
import { getReferenceImagePath } from "../utils/file.js";
|
|
4
4
|
import { getExtention } from "../utils/utils.js";
|
|
5
5
|
import { graphOption } from "./images.js";
|
|
@@ -13,6 +13,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
13
13
|
// generate image
|
|
14
14
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
15
15
|
const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
|
|
16
|
+
GraphAILogger.info(`Generating reference image for ${key}: ${prompt}`);
|
|
16
17
|
const image_graph_data = {
|
|
17
18
|
version: 0.5,
|
|
18
19
|
nodes: {
|
package/lib/actions/images.js
CHANGED
|
@@ -11,7 +11,7 @@ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../met
|
|
|
11
11
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
13
|
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
14
|
-
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
14
|
+
import { extractImageFromMovie, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
15
15
|
import { getImageRefs } from "./image_references.js";
|
|
16
16
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
17
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
@@ -166,16 +166,30 @@ const beat_graph_data = {
|
|
|
166
166
|
},
|
|
167
167
|
defaultValue: {},
|
|
168
168
|
},
|
|
169
|
+
audioChecker: {
|
|
170
|
+
if: ":preprocessor.movieFile",
|
|
171
|
+
agent: async (namedInputs) => {
|
|
172
|
+
const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
|
|
173
|
+
return { hasMovieAudio: hasAudio };
|
|
174
|
+
},
|
|
175
|
+
inputs: {
|
|
176
|
+
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
177
|
+
movieFile: ":preprocessor.movieFile",
|
|
178
|
+
},
|
|
179
|
+
defaultValue: {},
|
|
180
|
+
},
|
|
169
181
|
output: {
|
|
170
182
|
agent: "copyAgent",
|
|
171
183
|
inputs: {
|
|
172
|
-
onComplete: [":imageFromMovie", ":htmlImageGenerator"], // to wait for imageFromMovie to finish
|
|
184
|
+
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
|
|
173
185
|
imageFile: ":preprocessor.imagePath",
|
|
174
186
|
movieFile: ":preprocessor.movieFile",
|
|
187
|
+
hasMovieAudio: ":audioChecker.hasMovieAudio",
|
|
175
188
|
},
|
|
176
189
|
output: {
|
|
177
190
|
imageFile: ".imageFile",
|
|
178
191
|
movieFile: ".movieFile",
|
|
192
|
+
hasMovieAudio: ".hasMovieAudio",
|
|
179
193
|
},
|
|
180
194
|
isResult: true,
|
|
181
195
|
},
|
package/lib/actions/movie.js
CHANGED
|
@@ -204,8 +204,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
204
204
|
videoIdsForBeats.push(videoId);
|
|
205
205
|
}
|
|
206
206
|
// NOTE: We don't support audio if the speed is not 1.0.
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
|
|
208
|
+
if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
|
|
209
|
+
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
|
|
209
210
|
audioIdsFromMovieBeats.push(audioId);
|
|
210
211
|
ffmpegContext.filterComplex.push(audioPart);
|
|
211
212
|
}
|
|
@@ -10,7 +10,7 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
10
10
|
if (!musicFile.match(/^http/) && !fs.existsSync(musicFile)) {
|
|
11
11
|
throw new Error(`AddBGMAgent musicFile not exist: ${musicFile}`);
|
|
12
12
|
}
|
|
13
|
-
const speechDuration = await ffmpegGetMediaDuration(voiceFile);
|
|
13
|
+
const { duration: speechDuration } = await ffmpegGetMediaDuration(voiceFile);
|
|
14
14
|
const introPadding = context.presentationStyle.audioParams.introPadding;
|
|
15
15
|
const outroPadding = context.presentationStyle.audioParams.outroPadding;
|
|
16
16
|
const totalDuration = speechDuration + introPadding + outroPadding;
|
|
@@ -2,13 +2,14 @@ import { assert, GraphAILogger } from "graphai";
|
|
|
2
2
|
import { silent60secPath } from "../utils/file.js";
|
|
3
3
|
import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
4
4
|
import { userAssert } from "../utils/utils.js";
|
|
5
|
-
const
|
|
5
|
+
const getMovieDuration = async (beat) => {
|
|
6
6
|
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
7
7
|
const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
|
|
8
8
|
const speed = beat.movieParams?.speed ?? 1.0;
|
|
9
|
-
|
|
9
|
+
const { duration, hasAudio } = await ffmpegGetMediaDuration(pathOrUrl);
|
|
10
|
+
return { duration: duration / speed, hasAudio };
|
|
10
11
|
}
|
|
11
|
-
return 0;
|
|
12
|
+
return { duration: 0, hasAudio: false };
|
|
12
13
|
};
|
|
13
14
|
const getPadding = (context, beat, index) => {
|
|
14
15
|
if (beat.audioParams?.padding !== undefined) {
|
|
@@ -29,16 +30,17 @@ const getTotalPadding = (padding, movieDuration, audioDuration, duration) => {
|
|
|
29
30
|
}
|
|
30
31
|
return padding;
|
|
31
32
|
};
|
|
32
|
-
const
|
|
33
|
+
const getMediaDurationsOfAllBeats = (context) => {
|
|
33
34
|
return Promise.all(context.studio.beats.map(async (studioBeat, index) => {
|
|
34
35
|
const beat = context.studio.script.beats[index];
|
|
35
|
-
const movieDuration = await
|
|
36
|
-
const audioDuration = studioBeat.audioFile ? await ffmpegGetMediaDuration(studioBeat.audioFile) : 0;
|
|
36
|
+
const { duration: movieDuration, hasAudio: hasMovieAudio } = await getMovieDuration(beat);
|
|
37
|
+
const audioDuration = studioBeat.audioFile ? (await ffmpegGetMediaDuration(studioBeat.audioFile)).duration : 0;
|
|
37
38
|
return {
|
|
38
39
|
movieDuration,
|
|
39
40
|
audioDuration,
|
|
40
41
|
hasMedia: movieDuration + audioDuration > 0,
|
|
41
42
|
silenceDuration: 0,
|
|
43
|
+
hasMovieAudio,
|
|
42
44
|
};
|
|
43
45
|
}));
|
|
44
46
|
};
|
|
@@ -64,7 +66,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
64
66
|
const { context, combinedFileName } = namedInputs;
|
|
65
67
|
const ffmpegContext = FfmpegContextInit();
|
|
66
68
|
// First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
|
|
67
|
-
const mediaDurations = await
|
|
69
|
+
const mediaDurations = await getMediaDurationsOfAllBeats(context);
|
|
68
70
|
const beatDurations = [];
|
|
69
71
|
context.studio.script.beats.forEach((beat, index) => {
|
|
70
72
|
if (beatDurations.length > index) {
|
|
@@ -196,6 +198,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
196
198
|
audioDuration: mediaDurations[index].audioDuration,
|
|
197
199
|
movieDuration: mediaDurations[index].movieDuration,
|
|
198
200
|
silenceDuration: mediaDurations[index].silenceDuration,
|
|
201
|
+
hasMovieAudio: mediaDurations[index].hasMovieAudio,
|
|
199
202
|
})),
|
|
200
203
|
},
|
|
201
204
|
};
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
2
|
+
import type { ElevenlabsTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const ttsElevenlabsAgent: AgentFunction<ElevenlabsTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
|
|
3
4
|
declare const ttsElevenlabsAgentInfo: AgentFunctionInfo;
|
|
4
5
|
export default ttsElevenlabsAgentInfo;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
-
|
|
2
|
+
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
3
|
+
export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
|
|
3
4
|
const { text } = namedInputs;
|
|
4
5
|
const { voice, model, stability, similarityBoost, suppressError } = params;
|
|
5
6
|
const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
@@ -7,12 +8,12 @@ export const ttsElevenlabsAgent = async ({ namedInputs, params, config }) => {
|
|
|
7
8
|
throw new Error("ELEVENLABS_API_KEY environment variable is required");
|
|
8
9
|
}
|
|
9
10
|
if (!voice) {
|
|
10
|
-
throw new Error("Voice ID is required");
|
|
11
|
+
throw new Error("ELEVENLABS Voice ID is required");
|
|
11
12
|
}
|
|
12
13
|
try {
|
|
13
14
|
const requestBody = {
|
|
14
15
|
text,
|
|
15
|
-
model_id: model ??
|
|
16
|
+
model_id: model ?? provider2TTSAgent.elevenlabs.defaultModel,
|
|
16
17
|
voice_settings: {
|
|
17
18
|
stability: stability ?? 0.5,
|
|
18
19
|
similarity_boost: similarityBoost ?? 0.75,
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
speed: number;
|
|
5
|
-
suppressError: boolean;
|
|
6
|
-
}, {
|
|
7
|
-
buffer?: Buffer | null;
|
|
8
|
-
}, {
|
|
9
|
-
text: string;
|
|
10
|
-
}>;
|
|
2
|
+
import type { GoogleTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult } from "../types/agent.js";
|
|
3
|
+
export declare const ttsGoogleAgent: AgentFunction<GoogleTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs>;
|
|
11
4
|
declare const ttsGoogleAgentInfo: AgentFunctionInfo;
|
|
12
5
|
export default ttsGoogleAgentInfo;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
2
|
+
import type { NijivoiceTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const ttsNijivoiceAgent: AgentFunction<NijivoiceTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
|
|
3
4
|
declare const ttsNijivoiceAgentInfo: AgentFunctionInfo;
|
|
4
5
|
export default ttsNijivoiceAgentInfo;
|
|
@@ -6,11 +6,11 @@ const errorMessage = [
|
|
|
6
6
|
"1. Obtain an API key from Niji Voice (https://platform.nijivoice.com/) and set it as the NIJIVOICE_API_KEY environment variable.",
|
|
7
7
|
'2. Use OpenAI\'s TTS instead of Niji Voice by changing speechParams.provider from "nijivoice" to "openai".',
|
|
8
8
|
].join("\n");
|
|
9
|
-
export const ttsNijivoiceAgent = async ({ params, namedInputs, config }) => {
|
|
9
|
+
export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
|
|
10
10
|
const { suppressError, voice, speed, speed_global } = params;
|
|
11
11
|
const { apiKey } = config ?? {};
|
|
12
12
|
const { text } = namedInputs;
|
|
13
|
-
assert(apiKey ?? nijovoiceApiKey, errorMessage);
|
|
13
|
+
assert(!!(apiKey ?? nijovoiceApiKey), errorMessage);
|
|
14
14
|
const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
|
|
15
15
|
const options = {
|
|
16
16
|
method: "POST",
|
|
@@ -31,7 +31,7 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config }) => {
|
|
|
31
31
|
if (voiceJson && voiceJson.generatedVoice && voiceJson.generatedVoice.audioFileDownloadUrl) {
|
|
32
32
|
const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
|
|
33
33
|
const buffer = Buffer.from(await audioRes.arrayBuffer());
|
|
34
|
-
return { buffer
|
|
34
|
+
return { buffer };
|
|
35
35
|
}
|
|
36
36
|
if (suppressError) {
|
|
37
37
|
return {
|
|
@@ -1,16 +1,5 @@
|
|
|
1
1
|
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
voice: string;
|
|
5
|
-
instructions: string;
|
|
6
|
-
suppressError: boolean;
|
|
7
|
-
}, {
|
|
8
|
-
buffer?: Buffer;
|
|
9
|
-
}, {
|
|
10
|
-
text: string;
|
|
11
|
-
}, {
|
|
12
|
-
baseURL?: string;
|
|
13
|
-
apiKey?: string;
|
|
14
|
-
}>;
|
|
2
|
+
import type { OpenAITTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, OpenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const ttsOpenaiAgent: AgentFunction<OpenAITTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, OpenAIImageAgentConfig>;
|
|
15
4
|
declare const ttsOpenaiAgentInfo: AgentFunctionInfo;
|
|
16
5
|
export default ttsOpenaiAgentInfo;
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
|
-
|
|
3
|
+
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
4
|
+
export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
4
5
|
const { text } = namedInputs;
|
|
5
6
|
const { model, voice, suppressError, instructions } = params;
|
|
6
7
|
const { apiKey, baseURL } = config ?? {};
|
|
7
8
|
const openai = new OpenAI({ apiKey, baseURL });
|
|
8
9
|
try {
|
|
9
10
|
const tts_options = {
|
|
10
|
-
model: model ??
|
|
11
|
-
voice: voice ??
|
|
11
|
+
model: model ?? provider2TTSAgent.openai.defaultModel,
|
|
12
|
+
voice: voice ?? provider2TTSAgent.openai.defaultVoice,
|
|
12
13
|
input: text,
|
|
13
14
|
};
|
|
14
15
|
if (instructions) {
|
package/lib/index.browser.d.ts
CHANGED
package/lib/index.browser.js
CHANGED
package/lib/index.js
CHANGED
|
@@ -8,6 +8,7 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
8
8
|
getSpeechOptions(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeechOptions | undefined;
|
|
9
9
|
getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
|
|
10
10
|
getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
|
|
11
|
+
getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
|
|
11
12
|
getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
12
13
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
13
14
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
@@ -53,6 +53,10 @@ export const MulmoPresentationStyleMethods = {
|
|
|
53
53
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
54
54
|
return speaker.provider ?? presentationStyle.speechParams.provider;
|
|
55
55
|
},
|
|
56
|
+
getTTSModel(presentationStyle, beat) {
|
|
57
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
58
|
+
return speaker.model ?? presentationStyle.speechParams.model;
|
|
59
|
+
},
|
|
56
60
|
getVoiceId(presentationStyle, beat) {
|
|
57
61
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
58
62
|
return speaker.voiceId;
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -13,6 +13,15 @@ export type AgentBufferResult = {
|
|
|
13
13
|
export type AgentPromptInputs = {
|
|
14
14
|
prompt: string;
|
|
15
15
|
};
|
|
16
|
+
export type AgentTextInputs = {
|
|
17
|
+
text: string;
|
|
18
|
+
};
|
|
19
|
+
export type AgentErrorResult = {
|
|
20
|
+
error: unknown;
|
|
21
|
+
};
|
|
22
|
+
export type AgentConfig = {
|
|
23
|
+
apiKey?: string;
|
|
24
|
+
};
|
|
16
25
|
export type ImageAgentInputs = AgentPromptInputs;
|
|
17
26
|
export type OpenAIImageAgentInputs = AgentPromptInputs & {
|
|
18
27
|
referenceImages: string[] | null | undefined;
|
|
@@ -50,6 +59,24 @@ export type ReplicateMovieAgentParams = {
|
|
|
50
59
|
duration?: number;
|
|
51
60
|
};
|
|
52
61
|
export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
|
|
53
|
-
export type ReplicateMovieAgentConfig =
|
|
54
|
-
|
|
62
|
+
export type ReplicateMovieAgentConfig = AgentConfig;
|
|
63
|
+
export type TTSAgentParams = {
|
|
64
|
+
suppressError: boolean;
|
|
65
|
+
voice: string;
|
|
66
|
+
};
|
|
67
|
+
export type OpenAITTSAgentParams = TTSAgentParams & {
|
|
68
|
+
instructions: string;
|
|
69
|
+
model: string;
|
|
70
|
+
};
|
|
71
|
+
export type NijivoiceTTSAgentParams = TTSAgentParams & {
|
|
72
|
+
speed: number;
|
|
73
|
+
speed_global: number;
|
|
74
|
+
};
|
|
75
|
+
export type GoogleTTSAgentParams = TTSAgentParams & {
|
|
76
|
+
speed: number;
|
|
77
|
+
};
|
|
78
|
+
export type ElevenlabsTTSAgentParams = TTSAgentParams & {
|
|
79
|
+
model: string;
|
|
80
|
+
stability: number;
|
|
81
|
+
similarityBoost: number;
|
|
55
82
|
};
|
package/lib/types/agent.js
CHANGED