mulmocast 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.d.ts +0 -1
- package/lib/actions/audio.js +18 -13
- package/lib/actions/image_agents.d.ts +3 -12
- package/lib/actions/image_agents.js +12 -8
- package/lib/actions/images.js +3 -1
- package/lib/actions/movie.js +1 -3
- package/lib/actions/translate.js +13 -31
- package/lib/agents/image_openai_agent.js +4 -1
- package/lib/agents/lipsync_replicate_agent.js +10 -3
- package/lib/cli/commands/audio/handler.js +1 -1
- package/lib/cli/commands/image/handler.js +1 -1
- package/lib/cli/commands/movie/handler.js +1 -1
- package/lib/cli/commands/pdf/handler.js +1 -1
- package/lib/cli/helpers.d.ts +1 -4
- package/lib/cli/helpers.js +3 -2
- package/lib/mcp/server.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +5 -5
- package/lib/methods/mulmo_presentation_style.js +14 -8
- package/lib/methods/mulmo_script.js +4 -1
- package/lib/methods/mulmo_studio_context.d.ts +1 -0
- package/lib/methods/mulmo_studio_context.js +8 -0
- package/lib/types/agent.d.ts +4 -0
- package/lib/types/schema.d.ts +712 -8
- package/lib/types/schema.js +6 -2
- package/lib/types/type.d.ts +1 -1
- package/lib/utils/const.js +1 -1
- package/lib/utils/context.d.ts +401 -34
- package/lib/utils/context.js +95 -56
- package/lib/utils/file.d.ts +1 -1
- package/lib/utils/file.js +5 -2
- package/lib/utils/filters.d.ts +1 -0
- package/lib/utils/filters.js +8 -0
- package/lib/utils/preprocess.d.ts +15 -2
- package/lib/utils/preprocess.js +3 -3
- package/lib/utils/provider2agent.d.ts +3 -2
- package/lib/utils/provider2agent.js +20 -2
- package/lib/utils/string.d.ts +1 -1
- package/lib/utils/string.js +11 -8
- package/package.json +2 -1
- package/scripts/templates/image_refs.json +1 -0
- package/scripts/templates/voice_over.json +1 -0
- package/scripts/test/gpt.json +33 -0
- package/scripts/test/mulmo_story.json +11 -0
- package/scripts/test/test.json +64 -0
- package/scripts/test/test1.json +41 -0
- package/scripts/test/test2.json +66 -0
- package/scripts/test/test_audio.json +152 -0
- package/scripts/test/test_audio_instructions.json +70 -0
- package/scripts/test/test_beats.json +59 -0
- package/scripts/test/test_captions.json +53 -0
- package/scripts/test/test_elevenlabs_models.json +194 -0
- package/scripts/test/test_en.json +29 -0
- package/scripts/test/test_hello.json +18 -0
- package/scripts/test/test_hello_google.json +26 -0
- package/scripts/test/test_html.json +67 -0
- package/scripts/test/test_image_refs.json +50 -0
- package/scripts/test/test_images.json +49 -0
- package/scripts/test/test_lang.json +87 -0
- package/scripts/test/test_layout.json +153 -0
- package/scripts/test/test_lipsync.json +62 -0
- package/scripts/test/test_loop.json +35 -0
- package/scripts/test/test_media.json +245 -0
- package/scripts/test/test_mixed_providers.json +92 -0
- package/scripts/test/test_movie.json +40 -0
- package/scripts/test/test_no_audio.json +253 -0
- package/scripts/test/test_no_audio_with_credit.json +254 -0
- package/scripts/test/test_order.json +69 -0
- package/scripts/test/test_order_portrait.json +73 -0
- package/scripts/test/test_replicate.json +145 -0
- package/scripts/test/test_slideout_left_no_audio.json +46 -0
- package/scripts/test/test_sound_effect.json +41 -0
- package/scripts/test/test_spillover.json +117 -0
- package/scripts/test/test_transition.json +56 -0
- package/scripts/test/test_transition_no_audio.json +46 -0
- package/scripts/test/test_video_speed.json +81 -0
- package/scripts/test/test_voice_over.json +105 -0
- package/scripts/test/test_voices.json +55 -0
package/lib/actions/audio.d.ts
CHANGED
|
@@ -2,6 +2,5 @@ import "dotenv/config";
|
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
3
|
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
4
|
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
5
|
-
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
6
5
|
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
7
6
|
export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -9,8 +9,8 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
|
9
9
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
|
-
import { text2SpeechProviderSchema
|
|
13
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
12
|
+
import { text2SpeechProviderSchema } from "../types/index.js";
|
|
13
|
+
import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
|
|
14
14
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
15
15
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
16
16
|
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
@@ -30,15 +30,15 @@ const getAudioPath = (context, beat, audioFile) => {
|
|
|
30
30
|
}
|
|
31
31
|
return audioFile;
|
|
32
32
|
};
|
|
33
|
-
const getAudioParam = (
|
|
34
|
-
const speaker = MulmoPresentationStyleMethods.getSpeaker(
|
|
33
|
+
const getAudioParam = (context, beat) => {
|
|
34
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
|
|
35
35
|
const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
|
|
36
36
|
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
37
37
|
return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
|
|
38
38
|
};
|
|
39
39
|
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
40
40
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
41
|
-
const { voiceId, provider, speechOptions, model } = getAudioParam(context
|
|
41
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
|
|
42
42
|
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
|
|
43
43
|
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
44
44
|
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
@@ -46,9 +46,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
|
46
46
|
};
|
|
47
47
|
const preprocessor = (namedInputs) => {
|
|
48
48
|
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
49
|
-
const { lang
|
|
49
|
+
const { lang } = context;
|
|
50
50
|
const text = localizedText(beat, multiLingual, lang);
|
|
51
|
-
const { voiceId, provider, speechOptions, model } = getAudioParam(
|
|
51
|
+
const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
|
|
52
52
|
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
53
53
|
studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
|
|
54
54
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
@@ -58,6 +58,8 @@ const preprocessor = (namedInputs) => {
|
|
|
58
58
|
voiceId,
|
|
59
59
|
speechOptions,
|
|
60
60
|
model,
|
|
61
|
+
provider,
|
|
62
|
+
lang,
|
|
61
63
|
audioPath,
|
|
62
64
|
studioBeat,
|
|
63
65
|
needsTTS,
|
|
@@ -84,6 +86,8 @@ const graph_tts = {
|
|
|
84
86
|
agent: ":preprocessor.ttsAgent",
|
|
85
87
|
inputs: {
|
|
86
88
|
text: ":preprocessor.text",
|
|
89
|
+
provider: ":preprocessor.provider",
|
|
90
|
+
lang: ":preprocessor.lang",
|
|
87
91
|
cache: {
|
|
88
92
|
force: [":context.force"],
|
|
89
93
|
file: ":preprocessor.audioPath",
|
|
@@ -173,12 +177,12 @@ const agentFilters = [
|
|
|
173
177
|
agent: fileCacheAgentFilter,
|
|
174
178
|
nodeIds: ["tts"],
|
|
175
179
|
},
|
|
180
|
+
{
|
|
181
|
+
name: "nijovoiceTextAgentFilter",
|
|
182
|
+
agent: nijovoiceTextAgentFilter,
|
|
183
|
+
nodeIds: ["tts"],
|
|
184
|
+
},
|
|
176
185
|
];
|
|
177
|
-
export const audioFilePath = (context) => {
|
|
178
|
-
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
179
|
-
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
180
|
-
return getAudioArtifactFilePath(outDirPath, fileName);
|
|
181
|
-
};
|
|
182
186
|
const getConcurrency = (context) => {
|
|
183
187
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
184
188
|
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
@@ -231,7 +235,7 @@ export const audio = async (context, settings, callbacks) => {
|
|
|
231
235
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
232
236
|
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
233
237
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
234
|
-
const audioArtifactFilePath =
|
|
238
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(context);
|
|
235
239
|
const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
|
|
236
240
|
const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
|
|
237
241
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
|
|
@@ -253,6 +257,7 @@ export const audio = async (context, settings, callbacks) => {
|
|
|
253
257
|
const result = await graph.run();
|
|
254
258
|
writingMessage(audioCombinedFilePath);
|
|
255
259
|
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
260
|
+
writingMessage(audioArtifactFilePath);
|
|
256
261
|
return result.combineFiles;
|
|
257
262
|
}
|
|
258
263
|
catch (__error) {
|
|
@@ -23,10 +23,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
23
23
|
};
|
|
24
24
|
lipSyncFile?: string;
|
|
25
25
|
lipSyncModel?: string;
|
|
26
|
-
|
|
27
|
-
agentName: string;
|
|
28
|
-
defaultModel: string;
|
|
29
|
-
};
|
|
26
|
+
lipSyncAgentName?: string;
|
|
30
27
|
audioFile?: string;
|
|
31
28
|
beatDuration?: number;
|
|
32
29
|
htmlPrompt?: undefined;
|
|
@@ -61,10 +58,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
61
58
|
};
|
|
62
59
|
lipSyncFile?: string;
|
|
63
60
|
lipSyncModel?: string;
|
|
64
|
-
|
|
65
|
-
agentName: string;
|
|
66
|
-
defaultModel: string;
|
|
67
|
-
};
|
|
61
|
+
lipSyncAgentName?: string;
|
|
68
62
|
audioFile?: string;
|
|
69
63
|
beatDuration?: number;
|
|
70
64
|
htmlPrompt?: undefined;
|
|
@@ -102,10 +96,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
102
96
|
};
|
|
103
97
|
lipSyncFile?: string;
|
|
104
98
|
lipSyncModel?: string;
|
|
105
|
-
|
|
106
|
-
agentName: string;
|
|
107
|
-
defaultModel: string;
|
|
108
|
-
};
|
|
99
|
+
lipSyncAgentName?: string;
|
|
109
100
|
audioFile?: string;
|
|
110
101
|
beatDuration?: number;
|
|
111
102
|
htmlPrompt?: undefined;
|
|
@@ -25,16 +25,20 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
25
25
|
movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
|
|
26
26
|
beatDuration: beat.duration ?? studioBeat?.duration,
|
|
27
27
|
};
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
|
|
29
|
+
if (isMovie) {
|
|
30
|
+
if (beat.soundEffectPrompt) {
|
|
31
|
+
returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
|
|
32
|
+
returnValue.soundEffectModel =
|
|
33
|
+
beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
|
|
34
|
+
returnValue.soundEffectFile = moviePaths.soundEffectFile;
|
|
35
|
+
returnValue.soundEffectPrompt = beat.soundEffectPrompt;
|
|
36
|
+
}
|
|
34
37
|
}
|
|
35
38
|
if (beat.enableLipSync) {
|
|
36
|
-
|
|
37
|
-
returnValue.
|
|
39
|
+
const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
|
|
40
|
+
returnValue.lipSyncAgentName = lipSyncAgentInfo.agentName;
|
|
41
|
+
returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? lipSyncAgentInfo.defaultModel;
|
|
38
42
|
returnValue.lipSyncFile = moviePaths.lipSyncFile;
|
|
39
43
|
// Audio file will be set from the beat's audio file when available
|
|
40
44
|
returnValue.audioFile = studioBeat?.audioFile;
|
package/lib/actions/images.js
CHANGED
|
@@ -135,6 +135,7 @@ const beat_graph_data = {
|
|
|
135
135
|
model: ":preprocessor.imageParams.model",
|
|
136
136
|
moderation: ":preprocessor.imageParams.moderation",
|
|
137
137
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
138
|
+
quality: ":preprocessor.imageParams.quality",
|
|
138
139
|
},
|
|
139
140
|
},
|
|
140
141
|
defaultValue: {},
|
|
@@ -217,10 +218,11 @@ const beat_graph_data = {
|
|
|
217
218
|
},
|
|
218
219
|
lipSyncGenerator: {
|
|
219
220
|
if: ":beat.enableLipSync",
|
|
220
|
-
agent: ":preprocessor.
|
|
221
|
+
agent: ":preprocessor.lipSyncAgentName",
|
|
221
222
|
inputs: {
|
|
222
223
|
onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
|
|
223
224
|
movieFile: ":preprocessor.movieFile",
|
|
225
|
+
imageFile: ":preprocessor.referenceImageForMovie",
|
|
224
226
|
audioFile: ":preprocessor.audioFile",
|
|
225
227
|
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
226
228
|
params: {
|
package/lib/actions/movie.js
CHANGED
|
@@ -246,9 +246,7 @@ export const movieFilePath = (context) => {
|
|
|
246
246
|
export const movie = async (context) => {
|
|
247
247
|
MulmoStudioContextMethods.setSessionState(context, "video", true);
|
|
248
248
|
try {
|
|
249
|
-
const
|
|
250
|
-
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
251
|
-
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
|
|
249
|
+
const audioArtifactFilePath = getAudioArtifactFilePath(context);
|
|
252
250
|
const outputVideoPath = movieFilePath(context);
|
|
253
251
|
if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
|
|
254
252
|
writingMessage(outputVideoPath);
|
package/lib/actions/translate.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { GraphAI, assert } from "graphai";
|
|
2
|
+
import { GraphAI, assert, isNull } from "graphai";
|
|
3
3
|
import * as agents from "@graphai/vanilla";
|
|
4
4
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
|
-
import { recursiveSplitJa
|
|
6
|
+
import { recursiveSplitJa } from "../utils/string.js";
|
|
7
7
|
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
8
8
|
import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
9
9
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
@@ -13,17 +13,9 @@ const translateGraph = {
|
|
|
13
13
|
version: 0.5,
|
|
14
14
|
nodes: {
|
|
15
15
|
context: {},
|
|
16
|
-
defaultLang: {},
|
|
17
16
|
outDirPath: {},
|
|
18
17
|
outputMultilingualFilePath: {},
|
|
19
|
-
|
|
20
|
-
agent: "stringUpdateTextAgent",
|
|
21
|
-
inputs: {
|
|
22
|
-
newText: ":context.studio.script.lang",
|
|
23
|
-
oldText: ":defaultLang",
|
|
24
|
-
},
|
|
25
|
-
},
|
|
26
|
-
targetLangs: {}, // TODO
|
|
18
|
+
targetLangs: {},
|
|
27
19
|
mergeStudioResult: {
|
|
28
20
|
isResult: true,
|
|
29
21
|
agent: "mergeObjectAgent",
|
|
@@ -37,7 +29,6 @@ const translateGraph = {
|
|
|
37
29
|
targetLangs: ":targetLangs",
|
|
38
30
|
context: ":context",
|
|
39
31
|
rows: ":context.studio.script.beats",
|
|
40
|
-
lang: ":lang",
|
|
41
32
|
},
|
|
42
33
|
params: {
|
|
43
34
|
rowKey: "beat",
|
|
@@ -62,7 +53,7 @@ const translateGraph = {
|
|
|
62
53
|
beat: ":beat",
|
|
63
54
|
multiLingual: ":multiLingual",
|
|
64
55
|
rows: ":targetLangs",
|
|
65
|
-
lang: ":lang
|
|
56
|
+
lang: ":context.studio.script.lang",
|
|
66
57
|
context: ":context",
|
|
67
58
|
beatIndex: ":__mapIndex",
|
|
68
59
|
},
|
|
@@ -120,17 +111,11 @@ const translateGraph = {
|
|
|
120
111
|
},
|
|
121
112
|
ttsTexts: {
|
|
122
113
|
agent: (namedInputs) => {
|
|
123
|
-
const { localizedText
|
|
114
|
+
const { localizedText } = namedInputs;
|
|
124
115
|
// cache
|
|
125
116
|
if (localizedText.ttsTexts) {
|
|
126
117
|
return localizedText;
|
|
127
118
|
}
|
|
128
|
-
if (targetLang === "ja") {
|
|
129
|
-
return {
|
|
130
|
-
...localizedText,
|
|
131
|
-
ttsTexts: localizedText?.texts?.map((text) => replacePairsJa(text, replacementsJa)),
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
119
|
return {
|
|
135
120
|
...localizedText,
|
|
136
121
|
ttsTexts: localizedText.texts,
|
|
@@ -180,18 +165,14 @@ const localizedTextCacheAgentFilter = async (context, next) => {
|
|
|
180
165
|
if (!beat.text) {
|
|
181
166
|
return { text: "" };
|
|
182
167
|
}
|
|
183
|
-
// The original text is unchanged and the target language text is present
|
|
184
|
-
if (multiLingual.multiLingualTexts &&
|
|
185
|
-
multiLingual.multiLingualTexts[lang] &&
|
|
186
|
-
multiLingual.multiLingualTexts[lang].text === beat.text &&
|
|
187
|
-
multiLingual.multiLingualTexts[targetLang] &&
|
|
188
|
-
multiLingual.multiLingualTexts[targetLang].text) {
|
|
189
|
-
return { text: multiLingual.multiLingualTexts[targetLang].text };
|
|
190
|
-
}
|
|
191
168
|
// same language
|
|
192
169
|
if (targetLang === lang) {
|
|
193
170
|
return { text: beat.text };
|
|
194
171
|
}
|
|
172
|
+
// The original text is unchanged and the target language text is present
|
|
173
|
+
if (multiLingual.multiLingualTexts?.[lang]?.text === beat.text && multiLingual.multiLingualTexts[targetLang]?.text) {
|
|
174
|
+
return { text: multiLingual.multiLingualTexts[targetLang].text };
|
|
175
|
+
}
|
|
195
176
|
try {
|
|
196
177
|
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
|
|
197
178
|
return await next(context);
|
|
@@ -207,8 +188,6 @@ const agentFilters = [
|
|
|
207
188
|
nodeIds: ["localizedTexts"],
|
|
208
189
|
},
|
|
209
190
|
];
|
|
210
|
-
const defaultLang = "en";
|
|
211
|
-
const targetLangs = ["ja", "en"];
|
|
212
191
|
export const translate = async (context, args) => {
|
|
213
192
|
const { settings, callbacks } = args ?? {};
|
|
214
193
|
try {
|
|
@@ -217,11 +196,14 @@ export const translate = async (context, args) => {
|
|
|
217
196
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
218
197
|
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
|
|
219
198
|
mkdir(outDirPath);
|
|
199
|
+
const langs = (context.multiLingual ?? []).map((x) => Object.keys(x.multiLingualTexts)).flat(); // existing langs in multiLingual
|
|
200
|
+
const targetLangs = [
|
|
201
|
+
...new Set([context.studio.script.lang, langs, context.lang, context.studio.script.captionParams?.lang].flat().filter((x) => !isNull(x))),
|
|
202
|
+
];
|
|
220
203
|
const config = settings2GraphAIConfig(settings, process.env);
|
|
221
204
|
assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
222
205
|
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
|
|
223
206
|
graph.injectValue("context", context);
|
|
224
|
-
graph.injectValue("defaultLang", defaultLang);
|
|
225
207
|
graph.injectValue("targetLangs", targetLangs);
|
|
226
208
|
graph.injectValue("outDirPath", outDirPath);
|
|
227
209
|
graph.injectValue("outputMultilingualFilePath", outputMultilingualFilePath);
|
|
@@ -6,7 +6,7 @@ import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
7
|
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
8
|
const { prompt, referenceImages } = namedInputs;
|
|
9
|
-
const { moderation, canvasSize } = params;
|
|
9
|
+
const { moderation, canvasSize, quality } = params;
|
|
10
10
|
const { apiKey, baseURL } = { ...config };
|
|
11
11
|
const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
|
|
12
12
|
const openai = new OpenAI({ apiKey, baseURL });
|
|
@@ -42,6 +42,9 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
|
42
42
|
};
|
|
43
43
|
if (model === "gpt-image-1") {
|
|
44
44
|
imageOptions.moderation = moderation || "auto";
|
|
45
|
+
if (quality) {
|
|
46
|
+
imageOptions.quality = quality;
|
|
47
|
+
}
|
|
45
48
|
}
|
|
46
49
|
const response = await (async () => {
|
|
47
50
|
try {
|
|
@@ -3,7 +3,7 @@ import { GraphAILogger } from "graphai";
|
|
|
3
3
|
import Replicate from "replicate";
|
|
4
4
|
import { provider2LipSyncAgent } from "../utils/provider2agent.js";
|
|
5
5
|
export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
6
|
-
const { movieFile, audioFile } = namedInputs;
|
|
6
|
+
const { movieFile, audioFile, imageFile } = namedInputs;
|
|
7
7
|
const apiKey = config?.apiKey;
|
|
8
8
|
const model = params.model ?? provider2LipSyncAgent.replicate.defaultModel;
|
|
9
9
|
if (!apiKey) {
|
|
@@ -12,10 +12,12 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
12
12
|
const replicate = new Replicate({
|
|
13
13
|
auth: apiKey,
|
|
14
14
|
});
|
|
15
|
-
const videoBuffer = readFileSync(movieFile);
|
|
15
|
+
const videoBuffer = movieFile ? readFileSync(movieFile) : undefined;
|
|
16
16
|
const audioBuffer = readFileSync(audioFile);
|
|
17
|
-
const
|
|
17
|
+
const imageBuffer = imageFile ? readFileSync(imageFile) : undefined;
|
|
18
|
+
const videoUri = videoBuffer ? `data:video/quicktime;base64,${videoBuffer.toString("base64")}` : undefined;
|
|
18
19
|
const audioUri = `data:audio/wav;base64,${audioBuffer.toString("base64")}`;
|
|
20
|
+
const imageUri = imageBuffer ? `data:image/png;base64,${imageBuffer.toString("base64")}` : undefined;
|
|
19
21
|
const input = {
|
|
20
22
|
video: undefined,
|
|
21
23
|
video_input: undefined,
|
|
@@ -23,6 +25,7 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
23
25
|
audio: undefined,
|
|
24
26
|
audio_input: undefined,
|
|
25
27
|
audio_file: undefined,
|
|
28
|
+
image: undefined,
|
|
26
29
|
};
|
|
27
30
|
const modelParams = provider2LipSyncAgent.replicate.modelParams[model];
|
|
28
31
|
if (!modelParams) {
|
|
@@ -30,12 +33,16 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
30
33
|
}
|
|
31
34
|
const videoParam = modelParams.video;
|
|
32
35
|
const audioParam = modelParams.audio;
|
|
36
|
+
const imageParam = modelParams.image;
|
|
33
37
|
if (videoParam === "video" || videoParam === "video_input" || videoParam === "video_url") {
|
|
34
38
|
input[videoParam] = videoUri;
|
|
35
39
|
}
|
|
36
40
|
if (audioParam === "audio" || audioParam === "audio_input" || audioParam === "audio_file") {
|
|
37
41
|
input[audioParam] = audioUri;
|
|
38
42
|
}
|
|
43
|
+
if (imageParam === "image") {
|
|
44
|
+
input[imageParam] = imageUri;
|
|
45
|
+
}
|
|
39
46
|
const model_identifier = provider2LipSyncAgent.replicate.modelParams[model]?.identifier ?? model;
|
|
40
47
|
try {
|
|
41
48
|
const output = await replicate.run(model_identifier, {
|
package/lib/cli/helpers.d.ts
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
import type { CliArgs } from "../types/cli_types.js";
|
|
2
2
|
import { FileObject, InitOptions, MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const runTranslateIfNeeded: (context: MulmoStudioContext,
|
|
4
|
-
l?: string;
|
|
5
|
-
c?: string;
|
|
6
|
-
}) => Promise<void>;
|
|
3
|
+
export declare const runTranslateIfNeeded: (context: MulmoStudioContext, includeCaption?: boolean) => Promise<void>;
|
|
7
4
|
export declare const setGraphAILogger: (verbose: boolean | undefined, logValues?: Record<string, unknown>) => void;
|
|
8
5
|
export declare const getFileObject: (args: {
|
|
9
6
|
basedir?: string;
|
package/lib/cli/helpers.js
CHANGED
|
@@ -5,10 +5,11 @@ import clipboardy from "clipboardy";
|
|
|
5
5
|
import { getBaseDirPath, getFullPath, getOutputStudioFilePath, resolveDirPath, mkdir, getOutputMultilingualFilePath, generateTimestampedFileName, } from "../utils/file.js";
|
|
6
6
|
import { isHttp } from "../utils/utils.js";
|
|
7
7
|
import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
|
|
8
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
8
9
|
import { translate } from "../actions/translate.js";
|
|
9
10
|
import { initializeContextFromFiles } from "../utils/context.js";
|
|
10
|
-
export const runTranslateIfNeeded = async (context,
|
|
11
|
-
if (
|
|
11
|
+
export const runTranslateIfNeeded = async (context, includeCaption = false) => {
|
|
12
|
+
if (MulmoStudioContextMethods.needTranslate(context, includeCaption)) {
|
|
12
13
|
GraphAILogger.log("run translate");
|
|
13
14
|
await translate(context);
|
|
14
15
|
}
|
package/lib/mcp/server.js
CHANGED
|
@@ -104,7 +104,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
104
104
|
throw new Error("Failed to initialize context from MulmoScript");
|
|
105
105
|
}
|
|
106
106
|
// Run translation if needed
|
|
107
|
-
await runTranslateIfNeeded(context
|
|
107
|
+
await runTranslateIfNeeded(context);
|
|
108
108
|
// Execute the requested command
|
|
109
109
|
switch (cmd) {
|
|
110
110
|
case "movie":
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
|
|
2
|
+
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
|
|
3
3
|
export declare const MulmoPresentationStyleMethods: {
|
|
4
4
|
getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
|
|
5
5
|
getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
|
|
6
6
|
getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
7
7
|
getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
|
|
8
|
-
getSpeaker(
|
|
9
|
-
getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
|
|
8
|
+
getSpeaker(context: MulmoStudioContext, beat: MulmoBeat): SpeakerData;
|
|
10
9
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
11
10
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
12
11
|
getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
|
|
@@ -37,9 +36,10 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
37
36
|
defaultModel: import("../utils/provider2agent.js").ReplicateModel;
|
|
38
37
|
models: import("../utils/provider2agent.js").ReplicateModel[];
|
|
39
38
|
modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
|
|
40
|
-
identifier?: `${string}/${string}:${string}`;
|
|
41
|
-
video
|
|
39
|
+
identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
|
|
40
|
+
video?: string;
|
|
42
41
|
audio: string;
|
|
42
|
+
image?: string;
|
|
43
43
|
}>;
|
|
44
44
|
};
|
|
45
45
|
getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
|
|
@@ -46,18 +46,24 @@ export const MulmoPresentationStyleMethods = {
|
|
|
46
46
|
}
|
|
47
47
|
return keys[0];
|
|
48
48
|
},
|
|
49
|
-
getSpeaker(
|
|
50
|
-
userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
|
|
51
|
-
const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
|
|
52
|
-
|
|
53
|
-
const speaker = presentationStyle.speechParams.speakers[speakerId];
|
|
49
|
+
getSpeaker(context, beat) {
|
|
50
|
+
userAssert(!!context.presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
|
|
51
|
+
const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(context.presentationStyle);
|
|
52
|
+
const speaker = context.presentationStyle.speechParams.speakers[speakerId];
|
|
54
53
|
userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
|
|
54
|
+
// Check if the speaker has a language-specific version
|
|
55
|
+
const lang = context.lang ?? context.studio.script.lang;
|
|
56
|
+
if (speaker.lang && lang && speaker.lang[lang]) {
|
|
57
|
+
return speaker.lang[lang];
|
|
58
|
+
}
|
|
55
59
|
return speaker;
|
|
56
60
|
},
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
61
|
+
/* NOTE: This method is not used.
|
|
62
|
+
getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
|
|
63
|
+
const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
|
|
64
|
+
return speaker.model;
|
|
60
65
|
},
|
|
66
|
+
*/
|
|
61
67
|
getText2ImageProvider(provider) {
|
|
62
68
|
return text2ImageProviderSchema.parse(provider);
|
|
63
69
|
},
|
|
@@ -18,6 +18,9 @@ const validators = [{ from: "1.0", to: "1.1", validator: validate_1_0 }];
|
|
|
18
18
|
export const MulmoScriptMethods = {
|
|
19
19
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
20
20
|
validate(script) {
|
|
21
|
+
const version = script.$mulmocast.version;
|
|
22
|
+
// lang was optional in 1.0 and 1.1
|
|
23
|
+
const defaultLang = version === "1.0" || version === "1.1" ? { lang: "en" } : {};
|
|
21
24
|
const validatedScript = validators.reduce((acc, validator) => {
|
|
22
25
|
if (acc.$mulmocast.version === validator.from) {
|
|
23
26
|
const validated = validator.validator(acc);
|
|
@@ -25,7 +28,7 @@ export const MulmoScriptMethods = {
|
|
|
25
28
|
return validated;
|
|
26
29
|
}
|
|
27
30
|
return acc;
|
|
28
|
-
}, script);
|
|
31
|
+
}, { ...defaultLang, ...script });
|
|
29
32
|
return mulmoScriptSchema.parse(validatedScript);
|
|
30
33
|
},
|
|
31
34
|
};
|
|
@@ -11,4 +11,5 @@ export declare const MulmoStudioContextMethods: {
|
|
|
11
11
|
getCaption(context: MulmoStudioContext): string | undefined;
|
|
12
12
|
setSessionState(context: MulmoStudioContext, sessionType: SessionType, value: boolean): void;
|
|
13
13
|
setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, value: boolean): void;
|
|
14
|
+
needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
|
|
14
15
|
};
|
|
@@ -63,4 +63,12 @@ export const MulmoStudioContextMethods = {
|
|
|
63
63
|
}
|
|
64
64
|
notifyBeatStateChange(context, sessionType, index);
|
|
65
65
|
},
|
|
66
|
+
needTranslate(context, includeCaption = false) {
|
|
67
|
+
// context.studio.script.lang = defaultLang, context.lang = targetLanguage.
|
|
68
|
+
if (includeCaption) {
|
|
69
|
+
return (context.studio.script.lang !== context.lang ||
|
|
70
|
+
(context.studio.script.captionParams?.lang && context.studio.script.lang !== context.studio.script.captionParams?.lang));
|
|
71
|
+
}
|
|
72
|
+
return context.studio.script.lang !== context.lang;
|
|
73
|
+
},
|
|
66
74
|
};
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export type OpenAIImageSize = "1792x1024" | "1024x1792" | "1024x1024" | "1536x1024" | "1024x1536";
|
|
2
2
|
export type OpenAIImageModeration = "low" | "auto";
|
|
3
|
+
export type OpenAIImageQuality = "low" | "medium" | "high" | "auto";
|
|
3
4
|
export type OpenAIImageOptions = {
|
|
4
5
|
model: string;
|
|
5
6
|
prompt: string;
|
|
6
7
|
n: number;
|
|
7
8
|
size: OpenAIImageSize;
|
|
8
9
|
moderation?: OpenAIImageModeration;
|
|
10
|
+
quality?: OpenAIImageQuality;
|
|
9
11
|
};
|
|
10
12
|
export type AgentBufferResult = {
|
|
11
13
|
buffer: Buffer;
|
|
@@ -35,6 +37,7 @@ export type ImageAgentParams = {
|
|
|
35
37
|
};
|
|
36
38
|
export type OpenAIImageAgentParams = ImageAgentParams & {
|
|
37
39
|
moderation: OpenAIImageModeration | null | undefined;
|
|
40
|
+
quality?: OpenAIImageQuality;
|
|
38
41
|
};
|
|
39
42
|
export type OpenAIImageAgentConfig = {
|
|
40
43
|
baseURL?: string;
|
|
@@ -74,6 +77,7 @@ export type LipSyncAgentInputs = {
|
|
|
74
77
|
lipSyncFile: string;
|
|
75
78
|
movieFile: string;
|
|
76
79
|
audioFile: string;
|
|
80
|
+
imageFile: string;
|
|
77
81
|
};
|
|
78
82
|
export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
|
|
79
83
|
export type ReplicateMovieAgentConfig = AgentConfig;
|