mulmocast 0.1.6 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +1 -1
- package/assets/templates/ani.json +48 -0
- package/assets/templates/ani_ja.json +44 -0
- package/assets/templates/characters.json +1 -1
- package/assets/templates/children_book.json +1 -1
- package/assets/templates/comic_strips.json +1 -1
- package/assets/templates/drslump_comic.json +1 -1
- package/assets/templates/ghibli_comic.json +1 -1
- package/assets/templates/ghibli_image_only.json +1 -1
- package/assets/templates/ghibli_shorts.json +2 -3
- package/assets/templates/ghost_comic.json +1 -1
- package/assets/templates/onepiece_comic.json +1 -1
- package/assets/templates/portrait_movie.json +1 -1
- package/assets/templates/realistic_movie.json +1 -1
- package/assets/templates/sensei_and_taro.json +4 -5
- package/assets/templates/shorts.json +1 -1
- package/assets/templates/trailer.json +1 -1
- package/lib/actions/audio.js +8 -7
- package/lib/actions/image_agents.d.ts +53 -98
- package/lib/actions/image_agents.js +14 -6
- package/lib/actions/images.js +42 -13
- package/lib/actions/movie.js +1 -1
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_replicate_agent.js +18 -5
- package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
- package/lib/agents/sound_effect_replicate_agent.js +59 -0
- package/lib/cli/commands/tool/scripting/builder.js +1 -1
- package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.js +1 -1
- package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
- package/lib/mcp/server.js +2 -2
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_presentation_style.d.ts +25 -6
- package/lib/methods/mulmo_presentation_style.js +33 -30
- package/lib/methods/mulmo_script.d.ts +4 -0
- package/lib/methods/mulmo_script.js +31 -0
- package/lib/tools/story_to_script.d.ts +1 -1
- package/lib/types/agent.d.ts +9 -0
- package/lib/types/schema.d.ts +727 -554
- package/lib/types/schema.js +41 -24
- package/lib/types/type.d.ts +4 -4
- package/lib/utils/assets.d.ts +18 -0
- package/lib/utils/assets.js +101 -0
- package/lib/utils/context.d.ts +98 -84
- package/lib/utils/context.js +2 -1
- package/lib/utils/ffmpeg_utils.js +6 -0
- package/lib/utils/file.d.ts +4 -1
- package/lib/utils/file.js +3 -5
- package/lib/utils/preprocess.d.ts +57 -47
- package/lib/utils/preprocess.js +7 -5
- package/lib/utils/provider2agent.d.ts +27 -7
- package/lib/utils/provider2agent.js +85 -7
- package/lib/utils/utils.d.ts +1 -2
- package/lib/utils/utils.js +4 -2
- package/package.json +7 -7
- package/scripts/templates/presentation.json~ +0 -119
package/lib/actions/images.js
CHANGED
|
@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
|
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
|
-
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
9
|
+
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent } from "../agents/index.js";
|
|
10
10
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
11
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
@@ -23,10 +23,14 @@ const movieAgents = {
|
|
|
23
23
|
movieGoogleAgent,
|
|
24
24
|
movieReplicateAgent,
|
|
25
25
|
};
|
|
26
|
+
const soundEffectAgents = {
|
|
27
|
+
soundEffectReplicateAgent,
|
|
28
|
+
};
|
|
26
29
|
const defaultAgents = {
|
|
27
30
|
...vanillaAgents,
|
|
28
31
|
...imageAgents,
|
|
29
32
|
...movieAgents,
|
|
33
|
+
...soundEffectAgents,
|
|
30
34
|
mediaMockAgent,
|
|
31
35
|
fileWriteAgent,
|
|
32
36
|
openAIAgent,
|
|
@@ -39,7 +43,6 @@ const beat_graph_data = {
|
|
|
39
43
|
nodes: {
|
|
40
44
|
context: {},
|
|
41
45
|
htmlImageAgentInfo: {},
|
|
42
|
-
movieAgentInfo: {},
|
|
43
46
|
imageRefs: {},
|
|
44
47
|
beat: {},
|
|
45
48
|
__mapIndex: {},
|
|
@@ -134,7 +137,7 @@ const beat_graph_data = {
|
|
|
134
137
|
},
|
|
135
138
|
movieGenerator: {
|
|
136
139
|
if: ":preprocessor.movieFile",
|
|
137
|
-
agent: ":movieAgentInfo.agent",
|
|
140
|
+
agent: ":preprocessor.movieAgentInfo.agent",
|
|
138
141
|
inputs: {
|
|
139
142
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
140
143
|
prompt: ":beat.moviePrompt",
|
|
@@ -147,7 +150,7 @@ const beat_graph_data = {
|
|
|
147
150
|
mulmoContext: ":context",
|
|
148
151
|
},
|
|
149
152
|
params: {
|
|
150
|
-
model: ":preprocessor.movieParams.model",
|
|
153
|
+
model: ":preprocessor.movieAgentInfo.movieParams.model",
|
|
151
154
|
duration: ":beat.duration",
|
|
152
155
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
153
156
|
},
|
|
@@ -167,28 +170,59 @@ const beat_graph_data = {
|
|
|
167
170
|
defaultValue: {},
|
|
168
171
|
},
|
|
169
172
|
audioChecker: {
|
|
170
|
-
if: ":preprocessor.movieFile",
|
|
171
173
|
agent: async (namedInputs) => {
|
|
172
|
-
|
|
174
|
+
if (namedInputs.soundEffectFile) {
|
|
175
|
+
return { hasMovieAudio: true };
|
|
176
|
+
}
|
|
177
|
+
const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
|
|
178
|
+
if (!sourceFile) {
|
|
179
|
+
return { hasMovieAudio: false };
|
|
180
|
+
}
|
|
181
|
+
const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
|
|
173
182
|
return { hasMovieAudio: hasAudio };
|
|
174
183
|
},
|
|
184
|
+
inputs: {
|
|
185
|
+
onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
|
|
186
|
+
movieFile: ":preprocessor.movieFile",
|
|
187
|
+
imageFile: ":preprocessor.imagePath",
|
|
188
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
soundEffectGenerator: {
|
|
192
|
+
if: ":preprocessor.soundEffectPrompt",
|
|
193
|
+
agent: ":preprocessor.soundEffectAgentInfo.agentName",
|
|
175
194
|
inputs: {
|
|
176
195
|
onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
|
|
196
|
+
prompt: ":preprocessor.soundEffectPrompt",
|
|
177
197
|
movieFile: ":preprocessor.movieFile",
|
|
198
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
199
|
+
params: {
|
|
200
|
+
model: ":preprocessor.soundEffectModel",
|
|
201
|
+
duration: ":beat.duration",
|
|
202
|
+
},
|
|
203
|
+
cache: {
|
|
204
|
+
force: [":context.force"],
|
|
205
|
+
file: ":preprocessor.soundEffectFile",
|
|
206
|
+
index: ":__mapIndex",
|
|
207
|
+
sessionType: "soundEffect",
|
|
208
|
+
mulmoContext: ":context",
|
|
209
|
+
},
|
|
178
210
|
},
|
|
179
211
|
defaultValue: {},
|
|
180
212
|
},
|
|
181
213
|
output: {
|
|
182
214
|
agent: "copyAgent",
|
|
183
215
|
inputs: {
|
|
184
|
-
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
|
|
216
|
+
onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator"], // to wait for imageFromMovie to finish
|
|
185
217
|
imageFile: ":preprocessor.imagePath",
|
|
186
218
|
movieFile: ":preprocessor.movieFile",
|
|
219
|
+
soundEffectFile: ":preprocessor.soundEffectFile",
|
|
187
220
|
hasMovieAudio: ":audioChecker.hasMovieAudio",
|
|
188
221
|
},
|
|
189
222
|
output: {
|
|
190
223
|
imageFile: ".imageFile",
|
|
191
224
|
movieFile: ".movieFile",
|
|
225
|
+
soundEffectFile: ".soundEffectFile",
|
|
192
226
|
hasMovieAudio: ".hasMovieAudio",
|
|
193
227
|
},
|
|
194
228
|
isResult: true,
|
|
@@ -201,7 +235,6 @@ const graph_data = {
|
|
|
201
235
|
nodes: {
|
|
202
236
|
context: {},
|
|
203
237
|
htmlImageAgentInfo: {},
|
|
204
|
-
movieAgentInfo: {},
|
|
205
238
|
outputStudioFilePath: {},
|
|
206
239
|
imageRefs: {},
|
|
207
240
|
map: {
|
|
@@ -210,7 +243,6 @@ const graph_data = {
|
|
|
210
243
|
rows: ":context.studio.script.beats",
|
|
211
244
|
context: ":context",
|
|
212
245
|
htmlImageAgentInfo: ":htmlImageAgentInfo",
|
|
213
|
-
movieAgentInfo: ":movieAgentInfo",
|
|
214
246
|
imageRefs: ":imageRefs",
|
|
215
247
|
},
|
|
216
248
|
isResult: true,
|
|
@@ -284,7 +316,7 @@ export const graphOption = async (context, settings) => {
|
|
|
284
316
|
{
|
|
285
317
|
name: "fileCacheAgentFilter",
|
|
286
318
|
agent: fileCacheAgentFilter,
|
|
287
|
-
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
|
|
319
|
+
nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator"],
|
|
288
320
|
},
|
|
289
321
|
],
|
|
290
322
|
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
@@ -314,9 +346,6 @@ const prepareGenerateImages = async (context) => {
|
|
|
314
346
|
const injections = {
|
|
315
347
|
context,
|
|
316
348
|
htmlImageAgentInfo,
|
|
317
|
-
movieAgentInfo: {
|
|
318
|
-
agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
|
|
319
|
-
},
|
|
320
349
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
|
|
321
350
|
imageRefs,
|
|
322
351
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
162
162
|
beatTimestamps.push(timestamp);
|
|
163
163
|
return timestamp; // Skip voice-over beats.
|
|
164
164
|
}
|
|
165
|
-
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
165
|
+
const sourceFile = studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
|
|
166
166
|
assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
167
167
|
assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
|
|
168
168
|
const extraPadding = (() => {
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -10,8 +10,9 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
|
+
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
13
14
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
14
15
|
import { textInputAgent } from "@graphai/input_agents";
|
|
15
16
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
16
17
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
17
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
18
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -10,9 +10,10 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
12
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
|
+
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
13
14
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
14
15
|
import { textInputAgent } from "@graphai/input_agents";
|
|
15
16
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
16
17
|
// import * as vanilla from "@graphai/vanilla";
|
|
17
18
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
18
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
19
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync } from "fs";
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import Replicate from "replicate";
|
|
4
|
+
import { provider2MovieAgent } from "../utils/provider2agent.js";
|
|
4
5
|
async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
|
|
5
6
|
const replicate = new Replicate({
|
|
6
7
|
auth: apiKey,
|
|
@@ -10,6 +11,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
|
|
|
10
11
|
duration,
|
|
11
12
|
image: undefined,
|
|
12
13
|
start_image: undefined,
|
|
14
|
+
first_frame_image: undefined,
|
|
13
15
|
aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
|
|
14
16
|
// resolution: "720p", // only for bytedance/seedance-1-lite
|
|
15
17
|
// fps: 24, // only for bytedance/seedance-1-lite
|
|
@@ -21,15 +23,19 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
|
|
|
21
23
|
if (imagePath) {
|
|
22
24
|
const buffer = readFileSync(imagePath);
|
|
23
25
|
const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
const start_image = provider2MovieAgent.replicate.modelParams[model]?.start_image;
|
|
27
|
+
if (start_image === "first_frame_image" || start_image === "image" || start_image === "start_image") {
|
|
28
|
+
input[start_image] = base64Image;
|
|
29
|
+
}
|
|
30
|
+
else if (start_image === undefined) {
|
|
31
|
+
throw new Error(`Model ${model} does not support image-to-video generation`);
|
|
26
32
|
}
|
|
27
33
|
else {
|
|
28
34
|
input.image = base64Image;
|
|
29
35
|
}
|
|
30
36
|
}
|
|
31
37
|
try {
|
|
32
|
-
const output = await replicate.run(model
|
|
38
|
+
const output = await replicate.run(model, { input });
|
|
33
39
|
// Download the generated video
|
|
34
40
|
if (output && typeof output === "object" && "url" in output) {
|
|
35
41
|
const videoUrl = output.url();
|
|
@@ -61,13 +67,20 @@ export const getAspectRatio = (canvasSize) => {
|
|
|
61
67
|
export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
|
|
62
68
|
const { prompt, imagePath } = namedInputs;
|
|
63
69
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
64
|
-
const
|
|
70
|
+
const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
|
|
71
|
+
if (!provider2MovieAgent.replicate.modelParams[model]) {
|
|
72
|
+
throw new Error(`Model ${model} is not supported`);
|
|
73
|
+
}
|
|
74
|
+
const duration = params.duration ?? provider2MovieAgent.replicate.modelParams[model].durations[0] ?? 5;
|
|
75
|
+
if (!provider2MovieAgent.replicate.modelParams[model].durations.includes(duration)) {
|
|
76
|
+
throw new Error(`Duration ${duration} is not supported for model ${model}. Supported durations: ${provider2MovieAgent.replicate.modelParams[model].durations.join(", ")}`);
|
|
77
|
+
}
|
|
65
78
|
const apiKey = config?.apiKey;
|
|
66
79
|
if (!apiKey) {
|
|
67
80
|
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
68
81
|
}
|
|
69
82
|
try {
|
|
70
|
-
const buffer = await generateMovie(
|
|
83
|
+
const buffer = await generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration);
|
|
71
84
|
if (buffer) {
|
|
72
85
|
return { buffer };
|
|
73
86
|
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentParams, ReplicateSoundEffectAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const soundEffectReplicateAgent: AgentFunction<ReplicateSoundEffectAgentParams, AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentConfig>;
|
|
4
|
+
declare const soundEffectReplicateAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default soundEffectReplicateAgentInfo;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger } from "graphai";
|
|
3
|
+
import Replicate from "replicate";
|
|
4
|
+
import { provider2SoundEffectAgent } from "../utils/provider2agent.js";
|
|
5
|
+
export const soundEffectReplicateAgent = async ({ namedInputs, params, config }) => {
|
|
6
|
+
const { prompt, movieFile } = namedInputs;
|
|
7
|
+
const apiKey = config?.apiKey;
|
|
8
|
+
const model = params.model ?? provider2SoundEffectAgent.replicate.defaultModel;
|
|
9
|
+
if (!apiKey) {
|
|
10
|
+
throw new Error("REPLICATE_API_TOKEN environment variable is required");
|
|
11
|
+
}
|
|
12
|
+
const replicate = new Replicate({
|
|
13
|
+
auth: apiKey,
|
|
14
|
+
});
|
|
15
|
+
const buffer = readFileSync(movieFile);
|
|
16
|
+
const uri = `data:video/quicktime;base64,${buffer.toString("base64")}`;
|
|
17
|
+
const input = {
|
|
18
|
+
video: uri,
|
|
19
|
+
prompt,
|
|
20
|
+
duration: params.duration,
|
|
21
|
+
// seed: -1,
|
|
22
|
+
// num_steps: 25,
|
|
23
|
+
// cfg_strength: 4.5,
|
|
24
|
+
// negative_prompt: "music"
|
|
25
|
+
};
|
|
26
|
+
try {
|
|
27
|
+
const model_identifier = provider2SoundEffectAgent.replicate.modelParams[model]?.identifier ?? model;
|
|
28
|
+
const output = await replicate.run(model_identifier, {
|
|
29
|
+
input,
|
|
30
|
+
});
|
|
31
|
+
if (output && typeof output === "object" && "url" in output) {
|
|
32
|
+
const videoUrl = output.url();
|
|
33
|
+
const videoResponse = await fetch(videoUrl);
|
|
34
|
+
if (!videoResponse.ok) {
|
|
35
|
+
throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
|
|
36
|
+
}
|
|
37
|
+
const arrayBuffer = await videoResponse.arrayBuffer();
|
|
38
|
+
return { buffer: Buffer.from(arrayBuffer) };
|
|
39
|
+
}
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
GraphAILogger.info("Failed to generate sound effect:", error.message);
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
const soundEffectReplicateAgentInfo = {
|
|
48
|
+
name: "soundEffectReplicateAgent",
|
|
49
|
+
agent: soundEffectReplicateAgent,
|
|
50
|
+
mock: soundEffectReplicateAgent,
|
|
51
|
+
samples: [],
|
|
52
|
+
description: "Replicate Sound Effect agent (movie to movie)",
|
|
53
|
+
category: ["movie"],
|
|
54
|
+
author: "Receptron Team",
|
|
55
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
56
|
+
license: "MIT",
|
|
57
|
+
environmentVariables: ["REPLICATE_API_TOKEN"],
|
|
58
|
+
};
|
|
59
|
+
export default soundEffectReplicateAgentInfo;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { llm } from "../../../../utils/
|
|
1
|
+
import { llm } from "../../../../utils/provider2agent.js";
|
|
2
2
|
import { getAvailableTemplates } from "../../../../utils/file.js";
|
|
3
3
|
const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
|
|
4
4
|
export const builder = (yargs) => {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { getAvailableTemplates } from "../../../../utils/file.js";
|
|
2
|
-
import { llm } from "../../../../utils/
|
|
2
|
+
import { llm } from "../../../../utils/provider2agent.js";
|
|
3
3
|
import { storyToScriptGenerateMode } from "../../../../utils/const.js";
|
|
4
4
|
const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
|
|
5
5
|
export const builder = (yargs) => {
|
package/lib/mcp/server.js
CHANGED
|
@@ -11,7 +11,7 @@ import { audio, images, movie, captions, pdf } from "../actions/index.js";
|
|
|
11
11
|
import { initializeContext, runTranslateIfNeeded } from "../cli/helpers.js";
|
|
12
12
|
import { outDirName } from "../utils/const.js";
|
|
13
13
|
import { resolveDirPath, mkdir, generateTimestampedFileName } from "../utils/file.js";
|
|
14
|
-
import {
|
|
14
|
+
import { MulmoScriptMethods } from "../methods/index.js";
|
|
15
15
|
const __filename = fileURLToPath(import.meta.url);
|
|
16
16
|
const __dirname = path.dirname(__filename);
|
|
17
17
|
// Load MulmoScript JSON Schema from file
|
|
@@ -83,7 +83,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
83
83
|
}
|
|
84
84
|
const { cmd, mulmoScript, options = {}, } = args;
|
|
85
85
|
// Validate MulmoScript schema
|
|
86
|
-
const validatedScript =
|
|
86
|
+
const validatedScript = MulmoScriptMethods.validate(mulmoScript);
|
|
87
87
|
// Save MulmoScript to output directory
|
|
88
88
|
const filePath = await saveMulmoScriptToOutput(validatedScript);
|
|
89
89
|
// Create argv-like object for CLI compatibility
|
package/lib/methods/index.d.ts
CHANGED
package/lib/methods/index.js
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { MulmoCanvasDimension, MulmoBeat,
|
|
2
|
+
import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
|
|
3
3
|
export declare const MulmoPresentationStyleMethods: {
|
|
4
4
|
getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
|
|
5
|
-
getSpeechProvider(presentationStyle: MulmoPresentationStyle): Text2SpeechProvider;
|
|
6
5
|
getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
|
|
7
6
|
getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
8
|
-
|
|
7
|
+
getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
|
|
9
8
|
getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
|
|
10
|
-
getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
|
|
11
9
|
getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
|
|
12
|
-
getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
13
10
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
14
11
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
15
|
-
|
|
12
|
+
getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
|
|
13
|
+
agent: string;
|
|
14
|
+
movieParams: {
|
|
15
|
+
speed?: number | undefined;
|
|
16
|
+
provider?: string | undefined;
|
|
17
|
+
model?: string | undefined;
|
|
18
|
+
fillOption?: {
|
|
19
|
+
style: "aspectFit" | "aspectFill";
|
|
20
|
+
} | undefined;
|
|
21
|
+
transition?: {
|
|
22
|
+
type: "fade" | "slideout_left";
|
|
23
|
+
duration: number;
|
|
24
|
+
} | undefined;
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
|
|
28
|
+
agentName: string;
|
|
29
|
+
defaultModel: import("../utils/provider2agent.js").ReplicateModel;
|
|
30
|
+
models: import("../utils/provider2agent.js").ReplicateModel[];
|
|
31
|
+
modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
|
|
32
|
+
identifier?: `${string}/${string}:${string}`;
|
|
33
|
+
}>;
|
|
34
|
+
};
|
|
16
35
|
getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
|
|
17
36
|
getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
|
|
18
37
|
getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import { isNull } from "graphai";
|
|
2
3
|
import { userAssert } from "../utils/utils.js";
|
|
3
|
-
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
|
|
4
|
-
import {
|
|
4
|
+
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
|
|
5
|
+
import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, defaultProviders, } from "../utils/provider2agent.js";
|
|
5
6
|
const defaultTextSlideStyles = [
|
|
6
7
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
7
8
|
"body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
@@ -20,14 +21,10 @@ export const MulmoPresentationStyleMethods = {
|
|
|
20
21
|
getCanvasSize(presentationStyle) {
|
|
21
22
|
return mulmoCanvasDimensionSchema.parse(presentationStyle.canvasSize);
|
|
22
23
|
},
|
|
23
|
-
getSpeechProvider(presentationStyle) {
|
|
24
|
-
return text2SpeechProviderSchema.parse(presentationStyle.speechParams?.provider);
|
|
25
|
-
},
|
|
26
24
|
getAllSpeechProviders(presentationStyle) {
|
|
27
25
|
const providers = new Set();
|
|
28
|
-
const defaultProvider = this.getSpeechProvider(presentationStyle);
|
|
29
26
|
Object.values(presentationStyle.speechParams.speakers).forEach((speaker) => {
|
|
30
|
-
const provider = speaker.provider
|
|
27
|
+
const provider = text2SpeechProviderSchema.parse(speaker.provider);
|
|
31
28
|
providers.add(provider);
|
|
32
29
|
});
|
|
33
30
|
return providers;
|
|
@@ -39,27 +36,27 @@ export const MulmoPresentationStyleMethods = {
|
|
|
39
36
|
// This code allows us to support both string and array of strings for cssStyles
|
|
40
37
|
return [...defaultTextSlideStyles, ...[styles], ...[extraStyles]].flat().join("\n");
|
|
41
38
|
},
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
getDefaultSpeaker(presentationStyle) {
|
|
40
|
+
const speakers = presentationStyle.speechParams.speakers ?? {};
|
|
41
|
+
const keys = Object.keys(speakers).sort();
|
|
42
|
+
userAssert(keys.length !== 0, "presentationStyle.speechParams.speakers is not set!!");
|
|
43
|
+
const defaultSpeaker = keys.find((key) => speakers[key].isDefault);
|
|
44
|
+
if (!isNull(defaultSpeaker)) {
|
|
45
|
+
return defaultSpeaker;
|
|
46
|
+
}
|
|
47
|
+
return keys[0];
|
|
44
48
|
},
|
|
45
49
|
getSpeaker(presentationStyle, beat) {
|
|
46
50
|
userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
|
|
52
|
+
userAssert(!!speakerId, "beat.speaker and default speaker is not set");
|
|
53
|
+
const speaker = presentationStyle.speechParams.speakers[speakerId];
|
|
54
|
+
userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
|
|
50
55
|
return speaker;
|
|
51
56
|
},
|
|
52
|
-
getTTSProvider(presentationStyle, beat) {
|
|
53
|
-
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
54
|
-
return speaker.provider ?? presentationStyle.speechParams.provider;
|
|
55
|
-
},
|
|
56
57
|
getTTSModel(presentationStyle, beat) {
|
|
57
58
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
58
|
-
return speaker.model
|
|
59
|
-
},
|
|
60
|
-
getVoiceId(presentationStyle, beat) {
|
|
61
|
-
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
62
|
-
return speaker.voiceId;
|
|
59
|
+
return speaker.model;
|
|
63
60
|
},
|
|
64
61
|
getText2ImageProvider(provider) {
|
|
65
62
|
return text2ImageProviderSchema.parse(provider);
|
|
@@ -80,17 +77,23 @@ export const MulmoPresentationStyleMethods = {
|
|
|
80
77
|
imageParams: { ...defaultImageParams, ...imageParams },
|
|
81
78
|
};
|
|
82
79
|
},
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const movieProvider = (
|
|
86
|
-
|
|
80
|
+
getMovieAgentInfo(presentationStyle, beat) {
|
|
81
|
+
const movieParams = { ...presentationStyle.movieParams, ...beat?.movieParams };
|
|
82
|
+
const movieProvider = text2MovieProviderSchema.parse(movieParams?.provider);
|
|
83
|
+
const agentInfo = provider2MovieAgent[movieProvider];
|
|
84
|
+
return {
|
|
85
|
+
agent: agentInfo.agentName,
|
|
86
|
+
movieParams,
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
getSoundEffectAgentInfo(presentationStyle, beat) {
|
|
90
|
+
const soundEffectProvider = (beat.soundEffectParams?.provider ??
|
|
91
|
+
presentationStyle.soundEffectParams?.provider ??
|
|
92
|
+
defaultProviders.soundEffect);
|
|
93
|
+
const agentInfo = provider2SoundEffectAgent[soundEffectProvider];
|
|
94
|
+
return agentInfo;
|
|
87
95
|
},
|
|
88
96
|
getConcurrency(presentationStyle) {
|
|
89
|
-
/*
|
|
90
|
-
if (presentationStyle.movieParams?.provider === "replicate") {
|
|
91
|
-
return 4;
|
|
92
|
-
}
|
|
93
|
-
*/
|
|
94
97
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
|
|
95
98
|
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
96
99
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { mulmoScriptSchema } from "../types/index.js";
|
|
2
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
3
|
+
const validate_1_0 = (script) => {
|
|
4
|
+
if (script.speechParams?.provider) {
|
|
5
|
+
if (typeof script.speechParams.speakers === "object") {
|
|
6
|
+
Object.keys(script.speechParams.speakers).forEach((speakerId) => {
|
|
7
|
+
const speaker = script.speechParams.speakers[speakerId];
|
|
8
|
+
if (!speaker.provider) {
|
|
9
|
+
speaker.provider = script.speechParams.provider;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
delete script.speechParams.provider;
|
|
14
|
+
}
|
|
15
|
+
return script;
|
|
16
|
+
};
|
|
17
|
+
const validators = [{ from: "1.0", to: "1.1", validator: validate_1_0 }];
|
|
18
|
+
export const MulmoScriptMethods = {
|
|
19
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
20
|
+
validate(script) {
|
|
21
|
+
const validatedScript = validators.reduce((acc, validator) => {
|
|
22
|
+
if (acc.$mulmocast.version === validator.from) {
|
|
23
|
+
const validated = validator.validator(acc);
|
|
24
|
+
validated.$mulmocast.version = validator.to;
|
|
25
|
+
return validated;
|
|
26
|
+
}
|
|
27
|
+
return acc;
|
|
28
|
+
}, script);
|
|
29
|
+
return mulmoScriptSchema.parse(validatedScript);
|
|
30
|
+
},
|
|
31
|
+
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { MulmoStoryboard, StoryToScriptGenerateMode } from "../types/index.js";
|
|
2
|
-
import { LLM } from "../utils/
|
|
2
|
+
import type { LLM } from "../utils/provider2agent.js";
|
|
3
3
|
export declare const storyToScript: ({ story, beatsPerScene, templateName, outdir, fileName, llm, llmModel, generateMode, }: {
|
|
4
4
|
story: MulmoStoryboard;
|
|
5
5
|
beatsPerScene: number;
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -58,8 +58,17 @@ export type ReplicateMovieAgentParams = {
|
|
|
58
58
|
};
|
|
59
59
|
duration?: number;
|
|
60
60
|
};
|
|
61
|
+
export type ReplicateSoundEffectAgentParams = {
|
|
62
|
+
model: `${string}/${string}` | undefined;
|
|
63
|
+
duration?: number;
|
|
64
|
+
};
|
|
65
|
+
export type SoundEffectAgentInputs = AgentPromptInputs & {
|
|
66
|
+
soundEffectFile: string;
|
|
67
|
+
movieFile: string;
|
|
68
|
+
};
|
|
61
69
|
export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
|
|
62
70
|
export type ReplicateMovieAgentConfig = AgentConfig;
|
|
71
|
+
export type ReplicateSoundEffectAgentConfig = AgentConfig;
|
|
63
72
|
export type TTSAgentParams = {
|
|
64
73
|
suppressError: boolean;
|
|
65
74
|
voice: string;
|