mulmocast 0.1.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/assets/templates/akira_comic.json +1 -1
  2. package/assets/templates/ani.json +48 -0
  3. package/assets/templates/ani_ja.json +44 -0
  4. package/assets/templates/characters.json +1 -1
  5. package/assets/templates/children_book.json +1 -1
  6. package/assets/templates/comic_strips.json +1 -1
  7. package/assets/templates/drslump_comic.json +1 -1
  8. package/assets/templates/ghibli_comic.json +1 -1
  9. package/assets/templates/ghibli_image_only.json +1 -1
  10. package/assets/templates/ghibli_shorts.json +2 -3
  11. package/assets/templates/ghost_comic.json +1 -1
  12. package/assets/templates/onepiece_comic.json +1 -1
  13. package/assets/templates/portrait_movie.json +1 -1
  14. package/assets/templates/realistic_movie.json +1 -1
  15. package/assets/templates/sensei_and_taro.json +4 -5
  16. package/assets/templates/shorts.json +1 -1
  17. package/assets/templates/trailer.json +1 -1
  18. package/lib/actions/audio.js +8 -7
  19. package/lib/actions/image_agents.d.ts +53 -98
  20. package/lib/actions/image_agents.js +14 -6
  21. package/lib/actions/images.js +42 -13
  22. package/lib/actions/movie.js +1 -1
  23. package/lib/agents/index.d.ts +2 -1
  24. package/lib/agents/index.js +2 -1
  25. package/lib/agents/movie_replicate_agent.js +18 -5
  26. package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
  27. package/lib/agents/sound_effect_replicate_agent.js +59 -0
  28. package/lib/cli/commands/tool/scripting/builder.js +1 -1
  29. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
  30. package/lib/cli/commands/tool/story_to_script/builder.js +1 -1
  31. package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
  32. package/lib/mcp/server.js +2 -2
  33. package/lib/methods/index.d.ts +1 -0
  34. package/lib/methods/index.js +1 -0
  35. package/lib/methods/mulmo_presentation_style.d.ts +25 -6
  36. package/lib/methods/mulmo_presentation_style.js +33 -30
  37. package/lib/methods/mulmo_script.d.ts +4 -0
  38. package/lib/methods/mulmo_script.js +31 -0
  39. package/lib/tools/story_to_script.d.ts +1 -1
  40. package/lib/types/agent.d.ts +9 -0
  41. package/lib/types/schema.d.ts +727 -554
  42. package/lib/types/schema.js +41 -24
  43. package/lib/types/type.d.ts +4 -4
  44. package/lib/utils/assets.d.ts +18 -0
  45. package/lib/utils/assets.js +101 -0
  46. package/lib/utils/context.d.ts +98 -84
  47. package/lib/utils/context.js +2 -1
  48. package/lib/utils/ffmpeg_utils.js +6 -0
  49. package/lib/utils/file.d.ts +4 -1
  50. package/lib/utils/file.js +3 -5
  51. package/lib/utils/preprocess.d.ts +57 -47
  52. package/lib/utils/preprocess.js +7 -5
  53. package/lib/utils/provider2agent.d.ts +27 -7
  54. package/lib/utils/provider2agent.js +85 -7
  55. package/lib/utils/utils.d.ts +1 -2
  56. package/lib/utils/utils.js +4 -2
  57. package/package.json +7 -7
  58. package/scripts/templates/presentation.json~ +0 -119
@@ -6,7 +6,7 @@ import * as vanilla from "@graphai/vanilla";
6
6
  import { openAIAgent } from "@graphai/openai_agent";
7
7
  import { anthropicAgent } from "@graphai/anthropic_agent";
8
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
- import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
9
+ import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent } from "../agents/index.js";
10
10
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
11
11
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
12
12
  import { fileCacheAgentFilter } from "../utils/filters.js";
@@ -23,10 +23,14 @@ const movieAgents = {
23
23
  movieGoogleAgent,
24
24
  movieReplicateAgent,
25
25
  };
26
+ const soundEffectAgents = {
27
+ soundEffectReplicateAgent,
28
+ };
26
29
  const defaultAgents = {
27
30
  ...vanillaAgents,
28
31
  ...imageAgents,
29
32
  ...movieAgents,
33
+ ...soundEffectAgents,
30
34
  mediaMockAgent,
31
35
  fileWriteAgent,
32
36
  openAIAgent,
@@ -39,7 +43,6 @@ const beat_graph_data = {
39
43
  nodes: {
40
44
  context: {},
41
45
  htmlImageAgentInfo: {},
42
- movieAgentInfo: {},
43
46
  imageRefs: {},
44
47
  beat: {},
45
48
  __mapIndex: {},
@@ -134,7 +137,7 @@ const beat_graph_data = {
134
137
  },
135
138
  movieGenerator: {
136
139
  if: ":preprocessor.movieFile",
137
- agent: ":movieAgentInfo.agent",
140
+ agent: ":preprocessor.movieAgentInfo.agent",
138
141
  inputs: {
139
142
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
140
143
  prompt: ":beat.moviePrompt",
@@ -147,7 +150,7 @@ const beat_graph_data = {
147
150
  mulmoContext: ":context",
148
151
  },
149
152
  params: {
150
- model: ":preprocessor.movieParams.model",
153
+ model: ":preprocessor.movieAgentInfo.movieParams.model",
151
154
  duration: ":beat.duration",
152
155
  canvasSize: ":context.presentationStyle.canvasSize",
153
156
  },
@@ -167,28 +170,59 @@ const beat_graph_data = {
167
170
  defaultValue: {},
168
171
  },
169
172
  audioChecker: {
170
- if: ":preprocessor.movieFile",
171
173
  agent: async (namedInputs) => {
172
- const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
174
+ if (namedInputs.soundEffectFile) {
175
+ return { hasMovieAudio: true };
176
+ }
177
+ const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
178
+ if (!sourceFile) {
179
+ return { hasMovieAudio: false };
180
+ }
181
+ const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
173
182
  return { hasMovieAudio: hasAudio };
174
183
  },
184
+ inputs: {
185
+ onComplete: [":movieGenerator", ":htmlImageGenerator", ":soundEffectGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
186
+ movieFile: ":preprocessor.movieFile",
187
+ imageFile: ":preprocessor.imagePath",
188
+ soundEffectFile: ":preprocessor.soundEffectFile",
189
+ },
190
+ },
191
+ soundEffectGenerator: {
192
+ if: ":preprocessor.soundEffectPrompt",
193
+ agent: ":preprocessor.soundEffectAgentInfo.agentName",
175
194
  inputs: {
176
195
  onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
196
+ prompt: ":preprocessor.soundEffectPrompt",
177
197
  movieFile: ":preprocessor.movieFile",
198
+ soundEffectFile: ":preprocessor.soundEffectFile",
199
+ params: {
200
+ model: ":preprocessor.soundEffectModel",
201
+ duration: ":beat.duration",
202
+ },
203
+ cache: {
204
+ force: [":context.force"],
205
+ file: ":preprocessor.soundEffectFile",
206
+ index: ":__mapIndex",
207
+ sessionType: "soundEffect",
208
+ mulmoContext: ":context",
209
+ },
178
210
  },
179
211
  defaultValue: {},
180
212
  },
181
213
  output: {
182
214
  agent: "copyAgent",
183
215
  inputs: {
184
- onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
216
+ onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker", ":soundEffectGenerator"], // to wait for imageFromMovie to finish
185
217
  imageFile: ":preprocessor.imagePath",
186
218
  movieFile: ":preprocessor.movieFile",
219
+ soundEffectFile: ":preprocessor.soundEffectFile",
187
220
  hasMovieAudio: ":audioChecker.hasMovieAudio",
188
221
  },
189
222
  output: {
190
223
  imageFile: ".imageFile",
191
224
  movieFile: ".movieFile",
225
+ soundEffectFile: ".soundEffectFile",
192
226
  hasMovieAudio: ".hasMovieAudio",
193
227
  },
194
228
  isResult: true,
@@ -201,7 +235,6 @@ const graph_data = {
201
235
  nodes: {
202
236
  context: {},
203
237
  htmlImageAgentInfo: {},
204
- movieAgentInfo: {},
205
238
  outputStudioFilePath: {},
206
239
  imageRefs: {},
207
240
  map: {
@@ -210,7 +243,6 @@ const graph_data = {
210
243
  rows: ":context.studio.script.beats",
211
244
  context: ":context",
212
245
  htmlImageAgentInfo: ":htmlImageAgentInfo",
213
- movieAgentInfo: ":movieAgentInfo",
214
246
  imageRefs: ":imageRefs",
215
247
  },
216
248
  isResult: true,
@@ -284,7 +316,7 @@ export const graphOption = async (context, settings) => {
284
316
  {
285
317
  name: "fileCacheAgentFilter",
286
318
  agent: fileCacheAgentFilter,
287
- nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
319
+ nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent", "soundEffectGenerator"],
288
320
  },
289
321
  ],
290
322
  taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
@@ -314,9 +346,6 @@ const prepareGenerateImages = async (context) => {
314
346
  const injections = {
315
347
  context,
316
348
  htmlImageAgentInfo,
317
- movieAgentInfo: {
318
- agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
319
- },
320
349
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
321
350
  imageRefs,
322
351
  };
@@ -162,7 +162,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
162
162
  beatTimestamps.push(timestamp);
163
163
  return timestamp; // Skip voice-over beats.
164
164
  }
165
- const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
165
+ const sourceFile = studioBeat.soundEffectFile ?? studioBeat.movieFile ?? studioBeat.imageFile;
166
166
  assert(!!sourceFile, `studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
167
167
  assert(!!studioBeat.duration, `studioBeat.duration is not set: index=${index}`);
168
168
  const extraPadding = (() => {
@@ -10,8 +10,9 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
10
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
11
11
  import ttsOpenaiAgent from "./tts_openai_agent.js";
12
12
  import validateSchemaAgent from "./validate_schema_agent.js";
13
+ import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
13
14
  import { browserlessAgent } from "@graphai/browserless_agent";
14
15
  import { textInputAgent } from "@graphai/input_agents";
15
16
  import { openAIAgent } from "@graphai/openai_agent";
16
17
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
17
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
18
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
@@ -10,9 +10,10 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
10
10
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
11
11
  import ttsOpenaiAgent from "./tts_openai_agent.js";
12
12
  import validateSchemaAgent from "./validate_schema_agent.js";
13
+ import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
13
14
  import { browserlessAgent } from "@graphai/browserless_agent";
14
15
  import { textInputAgent } from "@graphai/input_agents";
15
16
  import { openAIAgent } from "@graphai/openai_agent";
16
17
  // import * as vanilla from "@graphai/vanilla";
17
18
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
18
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
19
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, tavilySearchAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, soundEffectReplicateAgent, };
@@ -1,6 +1,7 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
+ import { provider2MovieAgent } from "../utils/provider2agent.js";
4
5
  async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
5
6
  const replicate = new Replicate({
6
7
  auth: apiKey,
@@ -10,6 +11,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
10
11
  duration,
11
12
  image: undefined,
12
13
  start_image: undefined,
14
+ first_frame_image: undefined,
13
15
  aspect_ratio: aspectRatio, // only for bytedance/seedance-1-lite
14
16
  // resolution: "720p", // only for bytedance/seedance-1-lite
15
17
  // fps: 24, // only for bytedance/seedance-1-lite
@@ -21,15 +23,19 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
21
23
  if (imagePath) {
22
24
  const buffer = readFileSync(imagePath);
23
25
  const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
24
- if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
25
- input.start_image = base64Image;
26
+ const start_image = provider2MovieAgent.replicate.modelParams[model]?.start_image;
27
+ if (start_image === "first_frame_image" || start_image === "image" || start_image === "start_image") {
28
+ input[start_image] = base64Image;
29
+ }
30
+ else if (start_image === undefined) {
31
+ throw new Error(`Model ${model} does not support image-to-video generation`);
26
32
  }
27
33
  else {
28
34
  input.image = base64Image;
29
35
  }
30
36
  }
31
37
  try {
32
- const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
38
+ const output = await replicate.run(model, { input });
33
39
  // Download the generated video
34
40
  if (output && typeof output === "object" && "url" in output) {
35
41
  const videoUrl = output.url();
@@ -61,13 +67,20 @@ export const getAspectRatio = (canvasSize) => {
61
67
  export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
62
68
  const { prompt, imagePath } = namedInputs;
63
69
  const aspectRatio = getAspectRatio(params.canvasSize);
64
- const duration = params.duration ?? 5;
70
+ const model = params.model ?? provider2MovieAgent.replicate.defaultModel;
71
+ if (!provider2MovieAgent.replicate.modelParams[model]) {
72
+ throw new Error(`Model ${model} is not supported`);
73
+ }
74
+ const duration = params.duration ?? provider2MovieAgent.replicate.modelParams[model].durations[0] ?? 5;
75
+ if (!provider2MovieAgent.replicate.modelParams[model].durations.includes(duration)) {
76
+ throw new Error(`Duration ${duration} is not supported for model ${model}. Supported durations: ${provider2MovieAgent.replicate.modelParams[model].durations.join(", ")}`);
77
+ }
65
78
  const apiKey = config?.apiKey;
66
79
  if (!apiKey) {
67
80
  throw new Error("REPLICATE_API_TOKEN environment variable is required");
68
81
  }
69
82
  try {
70
- const buffer = await generateMovie(params.model, apiKey, prompt, imagePath, aspectRatio, duration);
83
+ const buffer = await generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration);
71
84
  if (buffer) {
72
85
  return { buffer };
73
86
  }
@@ -0,0 +1,5 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentParams, ReplicateSoundEffectAgentConfig } from "../types/agent.js";
3
+ export declare const soundEffectReplicateAgent: AgentFunction<ReplicateSoundEffectAgentParams, AgentBufferResult, SoundEffectAgentInputs, ReplicateSoundEffectAgentConfig>;
4
+ declare const soundEffectReplicateAgentInfo: AgentFunctionInfo;
5
+ export default soundEffectReplicateAgentInfo;
@@ -0,0 +1,59 @@
1
+ import { readFileSync } from "fs";
2
+ import { GraphAILogger } from "graphai";
3
+ import Replicate from "replicate";
4
+ import { provider2SoundEffectAgent } from "../utils/provider2agent.js";
5
+ export const soundEffectReplicateAgent = async ({ namedInputs, params, config }) => {
6
+ const { prompt, movieFile } = namedInputs;
7
+ const apiKey = config?.apiKey;
8
+ const model = params.model ?? provider2SoundEffectAgent.replicate.defaultModel;
9
+ if (!apiKey) {
10
+ throw new Error("REPLICATE_API_TOKEN environment variable is required");
11
+ }
12
+ const replicate = new Replicate({
13
+ auth: apiKey,
14
+ });
15
+ const buffer = readFileSync(movieFile);
16
+ const uri = `data:video/quicktime;base64,${buffer.toString("base64")}`;
17
+ const input = {
18
+ video: uri,
19
+ prompt,
20
+ duration: params.duration,
21
+ // seed: -1,
22
+ // num_steps: 25,
23
+ // cfg_strength: 4.5,
24
+ // negative_prompt: "music"
25
+ };
26
+ try {
27
+ const model_identifier = provider2SoundEffectAgent.replicate.modelParams[model]?.identifier ?? model;
28
+ const output = await replicate.run(model_identifier, {
29
+ input,
30
+ });
31
+ if (output && typeof output === "object" && "url" in output) {
32
+ const videoUrl = output.url();
33
+ const videoResponse = await fetch(videoUrl);
34
+ if (!videoResponse.ok) {
35
+ throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
36
+ }
37
+ const arrayBuffer = await videoResponse.arrayBuffer();
38
+ return { buffer: Buffer.from(arrayBuffer) };
39
+ }
40
+ return undefined;
41
+ }
42
+ catch (error) {
43
+ GraphAILogger.info("Failed to generate sound effect:", error.message);
44
+ throw error;
45
+ }
46
+ };
47
+ const soundEffectReplicateAgentInfo = {
48
+ name: "soundEffectReplicateAgent",
49
+ agent: soundEffectReplicateAgent,
50
+ mock: soundEffectReplicateAgent,
51
+ samples: [],
52
+ description: "Replicate Sound Effect agent (movie to movie)",
53
+ category: ["movie"],
54
+ author: "Receptron Team",
55
+ repository: "https://github.com/receptron/mulmocast-cli/",
56
+ license: "MIT",
57
+ environmentVariables: ["REPLICATE_API_TOKEN"],
58
+ };
59
+ export default soundEffectReplicateAgentInfo;
@@ -1,4 +1,4 @@
1
- import { llm } from "../../../../utils/utils.js";
1
+ import { llm } from "../../../../utils/provider2agent.js";
2
2
  import { getAvailableTemplates } from "../../../../utils/file.js";
3
3
  const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
4
4
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import { LLM } from "../../../../utils/utils.js";
2
+ import type { LLM } from "../../../../utils/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
@@ -1,5 +1,5 @@
1
1
  import { getAvailableTemplates } from "../../../../utils/file.js";
2
- import { llm } from "../../../../utils/utils.js";
2
+ import { llm } from "../../../../utils/provider2agent.js";
3
3
  import { storyToScriptGenerateMode } from "../../../../utils/const.js";
4
4
  const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
5
5
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import { LLM } from "../../../../utils/utils.js";
2
+ import type { LLM } from "../../../../utils/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
package/lib/mcp/server.js CHANGED
@@ -11,7 +11,7 @@ import { audio, images, movie, captions, pdf } from "../actions/index.js";
11
11
  import { initializeContext, runTranslateIfNeeded } from "../cli/helpers.js";
12
12
  import { outDirName } from "../utils/const.js";
13
13
  import { resolveDirPath, mkdir, generateTimestampedFileName } from "../utils/file.js";
14
- import { mulmoScriptSchema } from "../types/schema.js";
14
+ import { MulmoScriptMethods } from "../methods/index.js";
15
15
  const __filename = fileURLToPath(import.meta.url);
16
16
  const __dirname = path.dirname(__filename);
17
17
  // Load MulmoScript JSON Schema from file
@@ -83,7 +83,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
83
83
  }
84
84
  const { cmd, mulmoScript, options = {}, } = args;
85
85
  // Validate MulmoScript schema
86
- const validatedScript = mulmoScriptSchema.parse(mulmoScript);
86
+ const validatedScript = MulmoScriptMethods.validate(mulmoScript);
87
87
  // Save MulmoScript to output directory
88
88
  const filePath = await saveMulmoScriptToOutput(validatedScript);
89
89
  // Create argv-like object for CLI compatibility
@@ -3,3 +3,4 @@ export * from "./mulmo_script_template.js";
3
3
  export * from "./mulmo_studio_context.js";
4
4
  export * from "./mulmo_media_source.js";
5
5
  export * from "./mulmo_beat.js";
6
+ export * from "./mulmo_script.js";
@@ -3,3 +3,4 @@ export * from "./mulmo_script_template.js";
3
3
  export * from "./mulmo_studio_context.js";
4
4
  export * from "./mulmo_media_source.js";
5
5
  export * from "./mulmo_beat.js";
6
+ export * from "./mulmo_script.js";
@@ -1,18 +1,37 @@
1
1
  import "dotenv/config";
2
- import { MulmoCanvasDimension, MulmoBeat, SpeechOptions, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
2
+ import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
3
3
  export declare const MulmoPresentationStyleMethods: {
4
4
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
5
- getSpeechProvider(presentationStyle: MulmoPresentationStyle): Text2SpeechProvider;
6
5
  getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
7
6
  getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
8
- getSpeechOptions(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeechOptions | undefined;
7
+ getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
9
8
  getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
10
- getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
11
9
  getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
12
- getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
13
10
  getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
14
11
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
15
- getMovieAgent(presentationStyle: MulmoPresentationStyle): string;
12
+ getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
13
+ agent: string;
14
+ movieParams: {
15
+ speed?: number | undefined;
16
+ provider?: string | undefined;
17
+ model?: string | undefined;
18
+ fillOption?: {
19
+ style: "aspectFit" | "aspectFill";
20
+ } | undefined;
21
+ transition?: {
22
+ type: "fade" | "slideout_left";
23
+ duration: number;
24
+ } | undefined;
25
+ };
26
+ };
27
+ getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
28
+ agentName: string;
29
+ defaultModel: import("../utils/provider2agent.js").ReplicateModel;
30
+ models: import("../utils/provider2agent.js").ReplicateModel[];
31
+ modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
32
+ identifier?: `${string}/${string}:${string}`;
33
+ }>;
34
+ };
16
35
  getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
17
36
  getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
18
37
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
@@ -1,7 +1,8 @@
1
1
  import "dotenv/config";
2
+ import { isNull } from "graphai";
2
3
  import { userAssert } from "../utils/utils.js";
3
- import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
4
- import { defaultProviders, provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
4
+ import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
5
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, defaultProviders, } from "../utils/provider2agent.js";
5
6
  const defaultTextSlideStyles = [
6
7
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
7
8
  "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
@@ -20,14 +21,10 @@ export const MulmoPresentationStyleMethods = {
20
21
  getCanvasSize(presentationStyle) {
21
22
  return mulmoCanvasDimensionSchema.parse(presentationStyle.canvasSize);
22
23
  },
23
- getSpeechProvider(presentationStyle) {
24
- return text2SpeechProviderSchema.parse(presentationStyle.speechParams?.provider);
25
- },
26
24
  getAllSpeechProviders(presentationStyle) {
27
25
  const providers = new Set();
28
- const defaultProvider = this.getSpeechProvider(presentationStyle);
29
26
  Object.values(presentationStyle.speechParams.speakers).forEach((speaker) => {
30
- const provider = speaker.provider ?? defaultProvider;
27
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
31
28
  providers.add(provider);
32
29
  });
33
30
  return providers;
@@ -39,27 +36,27 @@ export const MulmoPresentationStyleMethods = {
39
36
  // This code allows us to support both string and array of strings for cssStyles
40
37
  return [...defaultTextSlideStyles, ...[styles], ...[extraStyles]].flat().join("\n");
41
38
  },
42
- getSpeechOptions(presentationStyle, beat) {
43
- return { ...presentationStyle.speechParams.speakers[beat.speaker].speechOptions, ...beat.speechOptions };
39
+ getDefaultSpeaker(presentationStyle) {
40
+ const speakers = presentationStyle.speechParams.speakers ?? {};
41
+ const keys = Object.keys(speakers).sort();
42
+ userAssert(keys.length !== 0, "presentationStyle.speechParams.speakers is not set!!");
43
+ const defaultSpeaker = keys.find((key) => speakers[key].isDefault);
44
+ if (!isNull(defaultSpeaker)) {
45
+ return defaultSpeaker;
46
+ }
47
+ return keys[0];
44
48
  },
45
49
  getSpeaker(presentationStyle, beat) {
46
50
  userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
47
- userAssert(!!beat?.speaker, "beat.speaker is not set");
48
- const speaker = presentationStyle.speechParams.speakers[beat.speaker];
49
- userAssert(!!speaker, `speaker is not set: speaker "${beat.speaker}"`);
51
+ const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
52
+ userAssert(!!speakerId, "beat.speaker and default speaker is not set");
53
+ const speaker = presentationStyle.speechParams.speakers[speakerId];
54
+ userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
50
55
  return speaker;
51
56
  },
52
- getTTSProvider(presentationStyle, beat) {
53
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
54
- return speaker.provider ?? presentationStyle.speechParams.provider;
55
- },
56
57
  getTTSModel(presentationStyle, beat) {
57
58
  const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
58
- return speaker.model ?? presentationStyle.speechParams.model;
59
- },
60
- getVoiceId(presentationStyle, beat) {
61
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
62
- return speaker.voiceId;
59
+ return speaker.model;
63
60
  },
64
61
  getText2ImageProvider(provider) {
65
62
  return text2ImageProviderSchema.parse(provider);
@@ -80,17 +77,23 @@ export const MulmoPresentationStyleMethods = {
80
77
  imageParams: { ...defaultImageParams, ...imageParams },
81
78
  };
82
79
  },
83
- // Determine movie agent based on provider
84
- getMovieAgent(presentationStyle) {
85
- const movieProvider = (presentationStyle.movieParams?.provider ?? defaultProviders.text2movie);
86
- return provider2MovieAgent[movieProvider].agentName;
80
+ getMovieAgentInfo(presentationStyle, beat) {
81
+ const movieParams = { ...presentationStyle.movieParams, ...beat?.movieParams };
82
+ const movieProvider = text2MovieProviderSchema.parse(movieParams?.provider);
83
+ const agentInfo = provider2MovieAgent[movieProvider];
84
+ return {
85
+ agent: agentInfo.agentName,
86
+ movieParams,
87
+ };
88
+ },
89
+ getSoundEffectAgentInfo(presentationStyle, beat) {
90
+ const soundEffectProvider = (beat.soundEffectParams?.provider ??
91
+ presentationStyle.soundEffectParams?.provider ??
92
+ defaultProviders.soundEffect);
93
+ const agentInfo = provider2SoundEffectAgent[soundEffectProvider];
94
+ return agentInfo;
87
95
  },
88
96
  getConcurrency(presentationStyle) {
89
- /*
90
- if (presentationStyle.movieParams?.provider === "replicate") {
91
- return 4;
92
- }
93
- */
94
97
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
95
98
  if (imageAgentInfo.imageParams.provider === "openai") {
96
99
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -0,0 +1,4 @@
1
+ import { MulmoScript } from "../types/index.js";
2
+ export declare const MulmoScriptMethods: {
3
+ validate(script: any): MulmoScript;
4
+ };
@@ -0,0 +1,31 @@
1
+ import { mulmoScriptSchema } from "../types/index.js";
2
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3
+ const validate_1_0 = (script) => {
4
+ if (script.speechParams?.provider) {
5
+ if (typeof script.speechParams.speakers === "object") {
6
+ Object.keys(script.speechParams.speakers).forEach((speakerId) => {
7
+ const speaker = script.speechParams.speakers[speakerId];
8
+ if (!speaker.provider) {
9
+ speaker.provider = script.speechParams.provider;
10
+ }
11
+ });
12
+ }
13
+ delete script.speechParams.provider;
14
+ }
15
+ return script;
16
+ };
17
+ const validators = [{ from: "1.0", to: "1.1", validator: validate_1_0 }];
18
+ export const MulmoScriptMethods = {
19
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
+ validate(script) {
21
+ const validatedScript = validators.reduce((acc, validator) => {
22
+ if (acc.$mulmocast.version === validator.from) {
23
+ const validated = validator.validator(acc);
24
+ validated.$mulmocast.version = validator.to;
25
+ return validated;
26
+ }
27
+ return acc;
28
+ }, script);
29
+ return mulmoScriptSchema.parse(validatedScript);
30
+ },
31
+ };
@@ -1,5 +1,5 @@
1
1
  import { MulmoStoryboard, StoryToScriptGenerateMode } from "../types/index.js";
2
- import { LLM } from "../utils/utils.js";
2
+ import type { LLM } from "../utils/provider2agent.js";
3
3
  export declare const storyToScript: ({ story, beatsPerScene, templateName, outdir, fileName, llm, llmModel, generateMode, }: {
4
4
  story: MulmoStoryboard;
5
5
  beatsPerScene: number;
@@ -58,8 +58,17 @@ export type ReplicateMovieAgentParams = {
58
58
  };
59
59
  duration?: number;
60
60
  };
61
+ export type ReplicateSoundEffectAgentParams = {
62
+ model: `${string}/${string}` | undefined;
63
+ duration?: number;
64
+ };
65
+ export type SoundEffectAgentInputs = AgentPromptInputs & {
66
+ soundEffectFile: string;
67
+ movieFile: string;
68
+ };
61
69
  export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
62
70
  export type ReplicateMovieAgentConfig = AgentConfig;
71
+ export type ReplicateSoundEffectAgentConfig = AgentConfig;
63
72
  export type TTSAgentParams = {
64
73
  suppressError: boolean;
65
74
  voice: string;