mulmocast 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +10 -1
- package/lib/actions/image_agents.d.ts +3 -12
- package/lib/actions/image_agents.js +12 -8
- package/lib/actions/images.js +2 -1
- package/lib/actions/translate.d.ts +51 -2
- package/lib/actions/translate.js +193 -148
- package/lib/agents/combine_audio_files_agent.js +1 -1
- package/lib/agents/lipsync_replicate_agent.js +10 -3
- package/lib/agents/tts_nijivoice_agent.js +1 -1
- package/lib/cli/commands/audio/handler.js +1 -1
- package/lib/cli/commands/image/handler.js +1 -1
- package/lib/cli/commands/movie/handler.js +1 -1
- package/lib/cli/commands/pdf/handler.js +1 -1
- package/lib/cli/helpers.d.ts +1 -4
- package/lib/cli/helpers.js +3 -2
- package/lib/index.common.d.ts +1 -0
- package/lib/index.common.js +1 -0
- package/lib/mcp/server.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +3 -2
- package/lib/methods/mulmo_script.d.ts +4 -1
- package/lib/methods/mulmo_script.js +18 -2
- package/lib/methods/mulmo_studio_context.d.ts +1 -0
- package/lib/methods/mulmo_studio_context.js +8 -0
- package/lib/types/agent.d.ts +1 -0
- package/lib/types/schema.d.ts +326 -230
- package/lib/types/schema.js +10 -3
- package/lib/types/type.d.ts +3 -2
- package/lib/utils/const.d.ts +1 -0
- package/lib/utils/const.js +2 -1
- package/lib/utils/context.d.ts +393 -50
- package/lib/utils/context.js +90 -57
- package/lib/utils/filters.d.ts +1 -0
- package/lib/utils/filters.js +8 -0
- package/lib/utils/image_plugins/mermaid.js +1 -1
- package/lib/utils/image_plugins/source.js +1 -1
- package/lib/utils/preprocess.d.ts +2 -2
- package/lib/utils/preprocess.js +3 -3
- package/lib/utils/provider2agent.d.ts +3 -2
- package/lib/utils/provider2agent.js +20 -2
- package/lib/utils/string.d.ts +1 -1
- package/lib/utils/string.js +12 -8
- package/lib/utils/utils.js +2 -6
- package/package.json +2 -2
- package/scripts/templates/image_refs.json +1 -0
- package/scripts/templates/voice_over.json +1 -0
- package/scripts/test/gpt.json +1 -0
- package/scripts/test/test1.json +1 -0
- package/scripts/test/test_audio.json +1 -0
- package/scripts/test/test_audio_instructions.json +1 -0
- package/scripts/test/test_beats.json +1 -0
- package/scripts/test/test_captions.json +1 -0
- package/scripts/test/test_elevenlabs_models.json +1 -0
- package/scripts/test/test_hello.json +1 -0
- package/scripts/test/test_hello_google.json +1 -0
- package/scripts/test/test_html.json +1 -0
- package/scripts/test/test_image_refs.json +1 -0
- package/scripts/test/test_images.json +1 -0
- package/scripts/test/test_lang.json +58 -2
- package/scripts/test/test_layout.json +1 -0
- package/scripts/test/test_lipsync.json +9 -0
- package/scripts/test/test_loop.json +1 -0
- package/scripts/test/test_media.json +1 -0
- package/scripts/test/test_mixed_providers.json +1 -0
- package/scripts/test/test_movie.json +1 -0
- package/scripts/test/test_no_audio.json +1 -0
- package/scripts/test/test_no_audio_with_credit.json +1 -0
- package/scripts/test/test_order.json +1 -0
- package/scripts/test/test_order_portrait.json +1 -0
- package/scripts/test/test_replicate.json +19 -0
- package/scripts/test/test_slideout_left_no_audio.json +1 -0
- package/scripts/test/test_spillover.json +1 -0
- package/scripts/test/test_transition.json +1 -0
- package/scripts/test/test_transition_no_audio.json +1 -0
- package/scripts/test/test_video_speed.json +1 -0
- package/scripts/test/test_voice_over.json +1 -0
- package/scripts/test/test_voices.json +1 -0
- package/scripts/templates/image_prompt_only_template.ts +0 -95
package/lib/actions/audio.js
CHANGED
|
@@ -10,7 +10,7 @@ import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
12
|
import { text2SpeechProviderSchema } from "../types/index.js";
|
|
13
|
-
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
+
import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
|
|
14
14
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
15
15
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
16
16
|
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
@@ -58,6 +58,8 @@ const preprocessor = (namedInputs) => {
|
|
|
58
58
|
voiceId,
|
|
59
59
|
speechOptions,
|
|
60
60
|
model,
|
|
61
|
+
provider,
|
|
62
|
+
lang,
|
|
61
63
|
audioPath,
|
|
62
64
|
studioBeat,
|
|
63
65
|
needsTTS,
|
|
@@ -84,6 +86,8 @@ const graph_tts = {
|
|
|
84
86
|
agent: ":preprocessor.ttsAgent",
|
|
85
87
|
inputs: {
|
|
86
88
|
text: ":preprocessor.text",
|
|
89
|
+
provider: ":preprocessor.provider",
|
|
90
|
+
lang: ":preprocessor.lang",
|
|
87
91
|
cache: {
|
|
88
92
|
force: [":context.force"],
|
|
89
93
|
file: ":preprocessor.audioPath",
|
|
@@ -173,6 +177,11 @@ const agentFilters = [
|
|
|
173
177
|
agent: fileCacheAgentFilter,
|
|
174
178
|
nodeIds: ["tts"],
|
|
175
179
|
},
|
|
180
|
+
{
|
|
181
|
+
name: "nijovoiceTextAgentFilter",
|
|
182
|
+
agent: nijovoiceTextAgentFilter,
|
|
183
|
+
nodeIds: ["tts"],
|
|
184
|
+
},
|
|
176
185
|
];
|
|
177
186
|
const getConcurrency = (context) => {
|
|
178
187
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
@@ -23,10 +23,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
23
23
|
};
|
|
24
24
|
lipSyncFile?: string;
|
|
25
25
|
lipSyncModel?: string;
|
|
26
|
-
|
|
27
|
-
agentName: string;
|
|
28
|
-
defaultModel: string;
|
|
29
|
-
};
|
|
26
|
+
lipSyncAgentName?: string;
|
|
30
27
|
audioFile?: string;
|
|
31
28
|
beatDuration?: number;
|
|
32
29
|
htmlPrompt?: undefined;
|
|
@@ -61,10 +58,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
61
58
|
};
|
|
62
59
|
lipSyncFile?: string;
|
|
63
60
|
lipSyncModel?: string;
|
|
64
|
-
|
|
65
|
-
agentName: string;
|
|
66
|
-
defaultModel: string;
|
|
67
|
-
};
|
|
61
|
+
lipSyncAgentName?: string;
|
|
68
62
|
audioFile?: string;
|
|
69
63
|
beatDuration?: number;
|
|
70
64
|
htmlPrompt?: undefined;
|
|
@@ -102,10 +96,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
102
96
|
};
|
|
103
97
|
lipSyncFile?: string;
|
|
104
98
|
lipSyncModel?: string;
|
|
105
|
-
|
|
106
|
-
agentName: string;
|
|
107
|
-
defaultModel: string;
|
|
108
|
-
};
|
|
99
|
+
lipSyncAgentName?: string;
|
|
109
100
|
audioFile?: string;
|
|
110
101
|
beatDuration?: number;
|
|
111
102
|
htmlPrompt?: undefined;
|
|
@@ -25,16 +25,20 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
25
25
|
movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
|
|
26
26
|
beatDuration: beat.duration ?? studioBeat?.duration,
|
|
27
27
|
};
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
|
|
29
|
+
if (isMovie) {
|
|
30
|
+
if (beat.soundEffectPrompt) {
|
|
31
|
+
returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
|
|
32
|
+
returnValue.soundEffectModel =
|
|
33
|
+
beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
|
|
34
|
+
returnValue.soundEffectFile = moviePaths.soundEffectFile;
|
|
35
|
+
returnValue.soundEffectPrompt = beat.soundEffectPrompt;
|
|
36
|
+
}
|
|
34
37
|
}
|
|
35
38
|
if (beat.enableLipSync) {
|
|
36
|
-
|
|
37
|
-
returnValue.
|
|
39
|
+
const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
|
|
40
|
+
returnValue.lipSyncAgentName = lipSyncAgentInfo.agentName;
|
|
41
|
+
returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? lipSyncAgentInfo.defaultModel;
|
|
38
42
|
returnValue.lipSyncFile = moviePaths.lipSyncFile;
|
|
39
43
|
// Audio file will be set from the beat's audio file when available
|
|
40
44
|
returnValue.audioFile = studioBeat?.audioFile;
|
package/lib/actions/images.js
CHANGED
|
@@ -218,10 +218,11 @@ const beat_graph_data = {
|
|
|
218
218
|
},
|
|
219
219
|
lipSyncGenerator: {
|
|
220
220
|
if: ":beat.enableLipSync",
|
|
221
|
-
agent: ":preprocessor.
|
|
221
|
+
agent: ":preprocessor.lipSyncAgentName",
|
|
222
222
|
inputs: {
|
|
223
223
|
onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
|
|
224
224
|
movieFile: ":preprocessor.movieFile",
|
|
225
|
+
imageFile: ":preprocessor.referenceImageForMovie",
|
|
225
226
|
audioFile: ":preprocessor.audioFile",
|
|
226
227
|
lipSyncFile: ":preprocessor.lipSyncFile",
|
|
227
228
|
params: {
|
|
@@ -1,7 +1,56 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
import { LANG, LocalizedText, MulmoStudioContext } from "../types/index.js";
|
|
4
|
+
export declare const translateTextGraph: {
|
|
5
|
+
version: number;
|
|
6
|
+
nodes: {
|
|
7
|
+
localizedText: {
|
|
8
|
+
inputs: {
|
|
9
|
+
targetLang: string;
|
|
10
|
+
beat: string;
|
|
11
|
+
multiLingual: string;
|
|
12
|
+
lang: string;
|
|
13
|
+
beatIndex: string;
|
|
14
|
+
mulmoContext: string;
|
|
15
|
+
system: string;
|
|
16
|
+
prompt: string[];
|
|
17
|
+
};
|
|
18
|
+
passThrough: {
|
|
19
|
+
lang: string;
|
|
20
|
+
};
|
|
21
|
+
output: {
|
|
22
|
+
text: string;
|
|
23
|
+
};
|
|
24
|
+
agent: string;
|
|
25
|
+
};
|
|
26
|
+
splitText: {
|
|
27
|
+
agent: (namedInputs: {
|
|
28
|
+
localizedText: LocalizedText;
|
|
29
|
+
targetLang: LANG;
|
|
30
|
+
}) => string[];
|
|
31
|
+
inputs: {
|
|
32
|
+
targetLang: string;
|
|
33
|
+
localizedText: string;
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
textTranslateResult: {
|
|
37
|
+
isResult: boolean;
|
|
38
|
+
agent: string;
|
|
39
|
+
inputs: {
|
|
40
|
+
lang: string;
|
|
41
|
+
text: string;
|
|
42
|
+
texts: string;
|
|
43
|
+
ttsTexts: string;
|
|
44
|
+
cacheKey: string;
|
|
45
|
+
};
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
export declare const translateBeat: (index: number, context: MulmoStudioContext, targetLangs: string[], args?: {
|
|
50
|
+
settings?: Record<string, string>;
|
|
51
|
+
callbacks?: CallbackFunction[];
|
|
52
|
+
}) => Promise<void>;
|
|
4
53
|
export declare const translate: (context: MulmoStudioContext, args?: {
|
|
5
54
|
callbacks?: CallbackFunction[];
|
|
6
55
|
settings?: Record<string, string>;
|
|
7
|
-
}) => Promise<
|
|
56
|
+
}) => Promise<MulmoStudioContext>;
|
package/lib/actions/translate.js
CHANGED
|
@@ -1,34 +1,165 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import {
|
|
2
|
+
import { createHash } from "crypto";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import { GraphAI, assert, isNull, GraphAILogger } from "graphai";
|
|
3
5
|
import * as agents from "@graphai/vanilla";
|
|
4
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
5
7
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
|
-
import { recursiveSplitJa
|
|
8
|
+
import { recursiveSplitJa } from "../utils/string.js";
|
|
7
9
|
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
10
|
+
import { getMultiLingual } from "../utils/context.js";
|
|
11
|
+
import { currentMulmoScriptVersion } from "../utils/const.js";
|
|
8
12
|
import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
9
13
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
10
14
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
11
15
|
const vanillaAgents = agents.default ?? agents;
|
|
12
|
-
const
|
|
16
|
+
const hashSHA256 = (text) => {
|
|
17
|
+
return createHash("sha256").update(text, "utf8").digest("hex");
|
|
18
|
+
};
|
|
19
|
+
// 1. translateGraph / map each beats.
|
|
20
|
+
// 2. beatGraph / map each target lang.
|
|
21
|
+
// 3. translateTextGraph / translate text.
|
|
22
|
+
export const translateTextGraph = {
|
|
13
23
|
version: 0.5,
|
|
14
24
|
nodes: {
|
|
25
|
+
localizedText: {
|
|
26
|
+
inputs: {
|
|
27
|
+
targetLang: ":targetLang", // for cache
|
|
28
|
+
beat: ":beat", // for cache
|
|
29
|
+
multiLingual: ":multiLingual", // for cache
|
|
30
|
+
lang: ":lang", // for cache
|
|
31
|
+
beatIndex: ":beatIndex", // for cache (state)
|
|
32
|
+
mulmoContext: ":context", // for cache (state)
|
|
33
|
+
system: translateSystemPrompt,
|
|
34
|
+
prompt: translatePrompts,
|
|
35
|
+
},
|
|
36
|
+
passThrough: {
|
|
37
|
+
lang: ":targetLang",
|
|
38
|
+
},
|
|
39
|
+
output: {
|
|
40
|
+
text: ".text",
|
|
41
|
+
},
|
|
42
|
+
// return { lang, text } <- localizedText
|
|
43
|
+
agent: "openAIAgent",
|
|
44
|
+
},
|
|
45
|
+
splitText: {
|
|
46
|
+
agent: (namedInputs) => {
|
|
47
|
+
const { localizedText, targetLang } = namedInputs;
|
|
48
|
+
// Cache
|
|
49
|
+
if (localizedText.texts) {
|
|
50
|
+
return localizedText.texts;
|
|
51
|
+
}
|
|
52
|
+
if (targetLang === "ja") {
|
|
53
|
+
return recursiveSplitJa(localizedText.text);
|
|
54
|
+
}
|
|
55
|
+
// not split
|
|
56
|
+
return [localizedText.text];
|
|
57
|
+
},
|
|
58
|
+
inputs: {
|
|
59
|
+
targetLang: ":targetLang",
|
|
60
|
+
localizedText: ":localizedText",
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
textTranslateResult: {
|
|
64
|
+
isResult: true,
|
|
65
|
+
agent: "copyAgent",
|
|
66
|
+
inputs: {
|
|
67
|
+
lang: ":targetLang",
|
|
68
|
+
text: ":localizedText.text",
|
|
69
|
+
texts: ":splitText",
|
|
70
|
+
ttsTexts: ":splitText",
|
|
71
|
+
cacheKey: ":multiLingual.cacheKey",
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
const beatGraph = {
|
|
77
|
+
version: 0.5,
|
|
78
|
+
nodes: {
|
|
79
|
+
targetLangs: {},
|
|
15
80
|
context: {},
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
agent:
|
|
81
|
+
beat: {},
|
|
82
|
+
__mapIndex: {},
|
|
83
|
+
// for cache
|
|
84
|
+
multiLingual: {
|
|
85
|
+
agent: (namedInputs) => {
|
|
86
|
+
const { multiLinguals, beatIndex, text } = namedInputs;
|
|
87
|
+
const cacheKey = hashSHA256(text ?? "");
|
|
88
|
+
const multiLingual = multiLinguals?.[beatIndex];
|
|
89
|
+
if (!multiLingual) {
|
|
90
|
+
return { cacheKey, multiLingualTexts: {} };
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
multiLingualTexts: Object.keys(multiLingual.multiLingualTexts).reduce((tmp, lang) => {
|
|
94
|
+
if (multiLingual.multiLingualTexts[lang].cacheKey === cacheKey) {
|
|
95
|
+
tmp[lang] = multiLingual.multiLingualTexts[lang];
|
|
96
|
+
}
|
|
97
|
+
return tmp;
|
|
98
|
+
}, {}),
|
|
99
|
+
cacheKey,
|
|
100
|
+
};
|
|
101
|
+
},
|
|
21
102
|
inputs: {
|
|
22
|
-
|
|
23
|
-
|
|
103
|
+
text: ":beat.text",
|
|
104
|
+
beatIndex: ":__mapIndex",
|
|
105
|
+
multiLinguals: ":context.multiLingual",
|
|
24
106
|
},
|
|
25
107
|
},
|
|
26
|
-
|
|
27
|
-
|
|
108
|
+
preprocessMultiLingual: {
|
|
109
|
+
agent: "mapAgent",
|
|
110
|
+
inputs: {
|
|
111
|
+
beat: ":beat",
|
|
112
|
+
multiLingual: ":multiLingual",
|
|
113
|
+
rows: ":targetLangs",
|
|
114
|
+
lang: ":context.studio.script.lang",
|
|
115
|
+
context: ":context",
|
|
116
|
+
beatIndex: ":__mapIndex",
|
|
117
|
+
},
|
|
118
|
+
params: {
|
|
119
|
+
compositeResult: true,
|
|
120
|
+
rowKey: "targetLang",
|
|
121
|
+
},
|
|
122
|
+
graph: translateTextGraph,
|
|
123
|
+
},
|
|
124
|
+
mergeLocalizedText: {
|
|
125
|
+
// console: { after: true},
|
|
126
|
+
agent: "arrayToObjectAgent",
|
|
127
|
+
inputs: {
|
|
128
|
+
items: ":preprocessMultiLingual.textTranslateResult",
|
|
129
|
+
},
|
|
130
|
+
params: {
|
|
131
|
+
key: "lang",
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
multiLingualTexts: {
|
|
135
|
+
agent: "mergeObjectAgent",
|
|
136
|
+
inputs: {
|
|
137
|
+
items: [":multiLingual.multiLingualTexts", ":mergeLocalizedText"],
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
mergeMultiLingualData: {
|
|
28
141
|
isResult: true,
|
|
142
|
+
// console: { after: true},
|
|
29
143
|
agent: "mergeObjectAgent",
|
|
30
144
|
inputs: {
|
|
31
|
-
items: [{
|
|
145
|
+
items: [":multiLingual", { multiLingualTexts: ":multiLingualTexts" }],
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
const translateGraph = {
|
|
151
|
+
version: 0.5,
|
|
152
|
+
nodes: {
|
|
153
|
+
context: {},
|
|
154
|
+
outDirPath: {},
|
|
155
|
+
outputMultilingualFilePath: {},
|
|
156
|
+
targetLangs: {},
|
|
157
|
+
mergeStudioResult: {
|
|
158
|
+
isResult: true,
|
|
159
|
+
agent: "copyAgent",
|
|
160
|
+
inputs: {
|
|
161
|
+
version: "1.1",
|
|
162
|
+
multiLingual: ":beatsMap.mergeMultiLingualData",
|
|
32
163
|
},
|
|
33
164
|
},
|
|
34
165
|
beatsMap: {
|
|
@@ -37,139 +168,18 @@ const translateGraph = {
|
|
|
37
168
|
targetLangs: ":targetLangs",
|
|
38
169
|
context: ":context",
|
|
39
170
|
rows: ":context.studio.script.beats",
|
|
40
|
-
lang: ":lang",
|
|
41
171
|
},
|
|
42
172
|
params: {
|
|
43
173
|
rowKey: "beat",
|
|
44
174
|
compositeResult: true,
|
|
45
175
|
},
|
|
46
|
-
graph:
|
|
47
|
-
version: 0.5,
|
|
48
|
-
nodes: {
|
|
49
|
-
// for cache
|
|
50
|
-
multiLingual: {
|
|
51
|
-
agent: (namedInputs) => {
|
|
52
|
-
return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
|
|
53
|
-
},
|
|
54
|
-
inputs: {
|
|
55
|
-
index: ":__mapIndex",
|
|
56
|
-
rows: ":context.multiLingual",
|
|
57
|
-
},
|
|
58
|
-
},
|
|
59
|
-
preprocessMultiLingual: {
|
|
60
|
-
agent: "mapAgent",
|
|
61
|
-
inputs: {
|
|
62
|
-
beat: ":beat",
|
|
63
|
-
multiLingual: ":multiLingual",
|
|
64
|
-
rows: ":targetLangs",
|
|
65
|
-
lang: ":lang.text",
|
|
66
|
-
context: ":context",
|
|
67
|
-
beatIndex: ":__mapIndex",
|
|
68
|
-
},
|
|
69
|
-
params: {
|
|
70
|
-
compositeResult: true,
|
|
71
|
-
rowKey: "targetLang",
|
|
72
|
-
},
|
|
73
|
-
graph: {
|
|
74
|
-
version: 0.5,
|
|
75
|
-
nodes: {
|
|
76
|
-
localizedTexts: {
|
|
77
|
-
inputs: {
|
|
78
|
-
targetLang: ":targetLang", // for cache
|
|
79
|
-
beat: ":beat", // for cache
|
|
80
|
-
multiLingual: ":multiLingual", // for cache
|
|
81
|
-
lang: ":lang", // for cache
|
|
82
|
-
beatIndex: ":beatIndex", // for cache
|
|
83
|
-
mulmoContext: ":context", // for cache
|
|
84
|
-
system: translateSystemPrompt,
|
|
85
|
-
prompt: translatePrompts,
|
|
86
|
-
},
|
|
87
|
-
passThrough: {
|
|
88
|
-
lang: ":targetLang",
|
|
89
|
-
},
|
|
90
|
-
output: {
|
|
91
|
-
text: ".text",
|
|
92
|
-
},
|
|
93
|
-
// return { lang, text } <- localizedText
|
|
94
|
-
agent: "openAIAgent",
|
|
95
|
-
},
|
|
96
|
-
splitText: {
|
|
97
|
-
agent: (namedInputs) => {
|
|
98
|
-
const { localizedText, targetLang } = namedInputs;
|
|
99
|
-
// Cache
|
|
100
|
-
if (localizedText.texts) {
|
|
101
|
-
return localizedText;
|
|
102
|
-
}
|
|
103
|
-
if (targetLang === "ja") {
|
|
104
|
-
return {
|
|
105
|
-
...localizedText,
|
|
106
|
-
texts: recursiveSplitJa(localizedText.text),
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
// not split
|
|
110
|
-
return {
|
|
111
|
-
...localizedText,
|
|
112
|
-
texts: [localizedText.text],
|
|
113
|
-
};
|
|
114
|
-
// return { lang, text, texts }
|
|
115
|
-
},
|
|
116
|
-
inputs: {
|
|
117
|
-
targetLang: ":targetLang",
|
|
118
|
-
localizedText: ":localizedTexts",
|
|
119
|
-
},
|
|
120
|
-
},
|
|
121
|
-
ttsTexts: {
|
|
122
|
-
agent: (namedInputs) => {
|
|
123
|
-
const { localizedText, targetLang } = namedInputs;
|
|
124
|
-
// cache
|
|
125
|
-
if (localizedText.ttsTexts) {
|
|
126
|
-
return localizedText;
|
|
127
|
-
}
|
|
128
|
-
if (targetLang === "ja") {
|
|
129
|
-
return {
|
|
130
|
-
...localizedText,
|
|
131
|
-
ttsTexts: localizedText?.texts?.map((text) => replacePairsJa(text, replacementsJa)),
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
return {
|
|
135
|
-
...localizedText,
|
|
136
|
-
ttsTexts: localizedText.texts,
|
|
137
|
-
};
|
|
138
|
-
},
|
|
139
|
-
inputs: {
|
|
140
|
-
targetLang: ":targetLang",
|
|
141
|
-
localizedText: ":splitText",
|
|
142
|
-
},
|
|
143
|
-
isResult: true,
|
|
144
|
-
},
|
|
145
|
-
},
|
|
146
|
-
},
|
|
147
|
-
},
|
|
148
|
-
mergeLocalizedText: {
|
|
149
|
-
agent: "arrayToObjectAgent",
|
|
150
|
-
inputs: {
|
|
151
|
-
items: ":preprocessMultiLingual.ttsTexts",
|
|
152
|
-
},
|
|
153
|
-
params: {
|
|
154
|
-
key: "lang",
|
|
155
|
-
},
|
|
156
|
-
},
|
|
157
|
-
mergeMultiLingualData: {
|
|
158
|
-
isResult: true,
|
|
159
|
-
agent: "mergeObjectAgent",
|
|
160
|
-
inputs: {
|
|
161
|
-
items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
|
|
162
|
-
},
|
|
163
|
-
},
|
|
164
|
-
},
|
|
165
|
-
},
|
|
176
|
+
graph: beatGraph,
|
|
166
177
|
},
|
|
167
178
|
writeOutput: {
|
|
168
|
-
// console: { before: true },
|
|
169
179
|
agent: "fileWriteAgent",
|
|
170
180
|
inputs: {
|
|
171
181
|
file: ":outputMultilingualFilePath",
|
|
172
|
-
text: ":mergeStudioResult.
|
|
182
|
+
text: ":mergeStudioResult.toJSON()",
|
|
173
183
|
},
|
|
174
184
|
},
|
|
175
185
|
},
|
|
@@ -180,18 +190,14 @@ const localizedTextCacheAgentFilter = async (context, next) => {
|
|
|
180
190
|
if (!beat.text) {
|
|
181
191
|
return { text: "" };
|
|
182
192
|
}
|
|
183
|
-
// The original text is unchanged and the target language text is present
|
|
184
|
-
if (multiLingual.multiLingualTexts &&
|
|
185
|
-
multiLingual.multiLingualTexts[lang] &&
|
|
186
|
-
multiLingual.multiLingualTexts[lang].text === beat.text &&
|
|
187
|
-
multiLingual.multiLingualTexts[targetLang] &&
|
|
188
|
-
multiLingual.multiLingualTexts[targetLang].text) {
|
|
189
|
-
return { text: multiLingual.multiLingualTexts[targetLang].text };
|
|
190
|
-
}
|
|
191
193
|
// same language
|
|
192
194
|
if (targetLang === lang) {
|
|
193
195
|
return { text: beat.text };
|
|
194
196
|
}
|
|
197
|
+
// The original text is unchanged and the target language text is present
|
|
198
|
+
if (multiLingual.cacheKey === multiLingual.multiLingualTexts[targetLang]?.cacheKey) {
|
|
199
|
+
return { text: multiLingual.multiLingualTexts[targetLang].text };
|
|
200
|
+
}
|
|
195
201
|
try {
|
|
196
202
|
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
|
|
197
203
|
return await next(context);
|
|
@@ -204,11 +210,49 @@ const agentFilters = [
|
|
|
204
210
|
{
|
|
205
211
|
name: "localizedTextCacheAgentFilter",
|
|
206
212
|
agent: localizedTextCacheAgentFilter,
|
|
207
|
-
nodeIds: ["
|
|
213
|
+
nodeIds: ["localizedText"],
|
|
208
214
|
},
|
|
209
215
|
];
|
|
210
|
-
const
|
|
211
|
-
const
|
|
216
|
+
export const translateBeat = async (index, context, targetLangs, args) => {
|
|
217
|
+
const { settings, callbacks } = args ?? {};
|
|
218
|
+
// Validate inputs
|
|
219
|
+
if (index < 0 || index >= context.studio.script.beats.length) {
|
|
220
|
+
throw new Error(`Invalid beat index: ${index}. Must be between 0 and ${context.studio.script.beats.length - 1}`);
|
|
221
|
+
}
|
|
222
|
+
if (!targetLangs || targetLangs.length === 0) {
|
|
223
|
+
throw new Error("targetLangs must be a non-empty array");
|
|
224
|
+
}
|
|
225
|
+
try {
|
|
226
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
227
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
228
|
+
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
|
|
229
|
+
mkdir(outDirPath);
|
|
230
|
+
const config = settings2GraphAIConfig(settings, process.env);
|
|
231
|
+
assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
232
|
+
const graph = new GraphAI(beatGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
|
|
233
|
+
graph.injectValue("context", context);
|
|
234
|
+
graph.injectValue("targetLangs", targetLangs);
|
|
235
|
+
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
236
|
+
graph.injectValue("__mapIndex", index);
|
|
237
|
+
if (callbacks) {
|
|
238
|
+
callbacks.forEach((callback) => {
|
|
239
|
+
graph.registerCallback(callback);
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
const results = await graph.run();
|
|
243
|
+
const multiLingual = getMultiLingual(outputMultilingualFilePath, context.studio.beats.length);
|
|
244
|
+
multiLingual[index] = results.mergeMultiLingualData;
|
|
245
|
+
const data = {
|
|
246
|
+
version: currentMulmoScriptVersion,
|
|
247
|
+
multiLingual,
|
|
248
|
+
};
|
|
249
|
+
fs.writeFileSync(outputMultilingualFilePath, JSON.stringify(data, null, 2), "utf8");
|
|
250
|
+
writingMessage(outputMultilingualFilePath);
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
GraphAILogger.log(error);
|
|
254
|
+
}
|
|
255
|
+
};
|
|
212
256
|
export const translate = async (context, args) => {
|
|
213
257
|
const { settings, callbacks } = args ?? {};
|
|
214
258
|
try {
|
|
@@ -217,11 +261,11 @@ export const translate = async (context, args) => {
|
|
|
217
261
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
218
262
|
const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
|
|
219
263
|
mkdir(outDirPath);
|
|
264
|
+
const targetLangs = [...new Set([context.lang, context.studio.script.captionParams?.lang].filter((x) => !isNull(x)))];
|
|
220
265
|
const config = settings2GraphAIConfig(settings, process.env);
|
|
221
266
|
assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
222
267
|
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
|
|
223
268
|
graph.injectValue("context", context);
|
|
224
|
-
graph.injectValue("defaultLang", defaultLang);
|
|
225
269
|
graph.injectValue("targetLangs", targetLangs);
|
|
226
270
|
graph.injectValue("outDirPath", outDirPath);
|
|
227
271
|
graph.injectValue("outputMultilingualFilePath", outputMultilingualFilePath);
|
|
@@ -239,4 +283,5 @@ export const translate = async (context, args) => {
|
|
|
239
283
|
finally {
|
|
240
284
|
MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
|
|
241
285
|
}
|
|
286
|
+
return context;
|
|
242
287
|
};
|
|
@@ -94,7 +94,7 @@ const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations,
|
|
|
94
94
|
if (voiceStartAt) {
|
|
95
95
|
const remainingDuration = movieDuration - voiceStartAt;
|
|
96
96
|
const duration = remaining - remainingDuration;
|
|
97
|
-
userAssert(duration >= 0, `Invalid startAt: At index(${idx}),
|
|
97
|
+
userAssert(duration >= 0, `Invalid startAt: At index(${idx}), available duration(${duration}) < 0`);
|
|
98
98
|
beatDurations.push(duration);
|
|
99
99
|
subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
|
|
100
100
|
userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
|
|
@@ -3,7 +3,7 @@ import { GraphAILogger } from "graphai";
|
|
|
3
3
|
import Replicate from "replicate";
|
|
4
4
|
import { provider2LipSyncAgent } from "../utils/provider2agent.js";
|
|
5
5
|
export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
6
|
-
const { movieFile, audioFile } = namedInputs;
|
|
6
|
+
const { movieFile, audioFile, imageFile } = namedInputs;
|
|
7
7
|
const apiKey = config?.apiKey;
|
|
8
8
|
const model = params.model ?? provider2LipSyncAgent.replicate.defaultModel;
|
|
9
9
|
if (!apiKey) {
|
|
@@ -12,10 +12,12 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
12
12
|
const replicate = new Replicate({
|
|
13
13
|
auth: apiKey,
|
|
14
14
|
});
|
|
15
|
-
const videoBuffer = readFileSync(movieFile);
|
|
15
|
+
const videoBuffer = movieFile ? readFileSync(movieFile) : undefined;
|
|
16
16
|
const audioBuffer = readFileSync(audioFile);
|
|
17
|
-
const
|
|
17
|
+
const imageBuffer = imageFile ? readFileSync(imageFile) : undefined;
|
|
18
|
+
const videoUri = videoBuffer ? `data:video/quicktime;base64,${videoBuffer.toString("base64")}` : undefined;
|
|
18
19
|
const audioUri = `data:audio/wav;base64,${audioBuffer.toString("base64")}`;
|
|
20
|
+
const imageUri = imageBuffer ? `data:image/png;base64,${imageBuffer.toString("base64")}` : undefined;
|
|
19
21
|
const input = {
|
|
20
22
|
video: undefined,
|
|
21
23
|
video_input: undefined,
|
|
@@ -23,6 +25,7 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
23
25
|
audio: undefined,
|
|
24
26
|
audio_input: undefined,
|
|
25
27
|
audio_file: undefined,
|
|
28
|
+
image: undefined,
|
|
26
29
|
};
|
|
27
30
|
const modelParams = provider2LipSyncAgent.replicate.modelParams[model];
|
|
28
31
|
if (!modelParams) {
|
|
@@ -30,12 +33,16 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
|
|
|
30
33
|
}
|
|
31
34
|
const videoParam = modelParams.video;
|
|
32
35
|
const audioParam = modelParams.audio;
|
|
36
|
+
const imageParam = modelParams.image;
|
|
33
37
|
if (videoParam === "video" || videoParam === "video_input" || videoParam === "video_url") {
|
|
34
38
|
input[videoParam] = videoUri;
|
|
35
39
|
}
|
|
36
40
|
if (audioParam === "audio" || audioParam === "audio_input" || audioParam === "audio_file") {
|
|
37
41
|
input[audioParam] = audioUri;
|
|
38
42
|
}
|
|
43
|
+
if (imageParam === "image") {
|
|
44
|
+
input[imageParam] = imageUri;
|
|
45
|
+
}
|
|
39
46
|
const model_identifier = provider2LipSyncAgent.replicate.modelParams[model]?.identifier ?? model;
|
|
40
47
|
try {
|
|
41
48
|
const output = await replicate.run(model_identifier, {
|
|
@@ -27,7 +27,7 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
|
|
|
27
27
|
try {
|
|
28
28
|
const voiceRes = await fetch(url, options);
|
|
29
29
|
const voiceJson = await voiceRes.json();
|
|
30
|
-
if (voiceJson
|
|
30
|
+
if (voiceJson?.generatedVoice?.audioFileDownloadUrl) {
|
|
31
31
|
const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
|
|
32
32
|
const buffer = Buffer.from(await audioRes.arrayBuffer());
|
|
33
33
|
return { buffer };
|