mulmocast 0.1.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/assets/templates/akira_comic.json +1 -1
  2. package/assets/templates/ani.json +1 -1
  3. package/assets/templates/ani_ja.json +2 -3
  4. package/assets/templates/characters.json +1 -1
  5. package/assets/templates/children_book.json +1 -1
  6. package/assets/templates/comic_strips.json +1 -1
  7. package/assets/templates/drslump_comic.json +1 -1
  8. package/assets/templates/ghibli_comic.json +1 -1
  9. package/assets/templates/ghibli_image_only.json +1 -1
  10. package/assets/templates/ghibli_shorts.json +2 -3
  11. package/assets/templates/ghost_comic.json +1 -1
  12. package/assets/templates/onepiece_comic.json +1 -1
  13. package/assets/templates/portrait_movie.json +1 -1
  14. package/assets/templates/realistic_movie.json +1 -1
  15. package/assets/templates/sensei_and_taro.json +4 -5
  16. package/assets/templates/shorts.json +1 -1
  17. package/assets/templates/trailer.json +1 -1
  18. package/lib/actions/audio.js +6 -7
  19. package/lib/actions/image_agents.d.ts +25 -76
  20. package/lib/actions/image_agents.js +11 -3
  21. package/lib/actions/images.js +36 -4
  22. package/lib/actions/movie.js +1 -1
  23. package/lib/agents/index.d.ts +2 -1
  24. package/lib/agents/index.js +2 -1
  25. package/lib/agents/movie_replicate_agent.js +17 -5
  26. package/lib/agents/sound_effect_replicate_agent.d.ts +5 -0
  27. package/lib/agents/sound_effect_replicate_agent.js +59 -0
  28. package/lib/mcp/server.js +2 -2
  29. package/lib/methods/index.d.ts +1 -0
  30. package/lib/methods/index.js +1 -0
  31. package/lib/methods/mulmo_presentation_style.d.ts +10 -5
  32. package/lib/methods/mulmo_presentation_style.js +24 -20
  33. package/lib/methods/mulmo_script.d.ts +4 -0
  34. package/lib/methods/mulmo_script.js +31 -0
  35. package/lib/types/agent.d.ts +9 -0
  36. package/lib/types/schema.d.ts +396 -244
  37. package/lib/types/schema.js +22 -12
  38. package/lib/types/type.d.ts +2 -3
  39. package/lib/utils/assets.d.ts +18 -0
  40. package/lib/utils/assets.js +101 -0
  41. package/lib/utils/context.d.ts +25 -12
  42. package/lib/utils/context.js +2 -1
  43. package/lib/utils/file.d.ts +4 -1
  44. package/lib/utils/file.js +3 -5
  45. package/lib/utils/preprocess.d.ts +20 -11
  46. package/lib/utils/preprocess.js +7 -5
  47. package/lib/utils/provider2agent.d.ts +19 -1
  48. package/lib/utils/provider2agent.js +73 -0
  49. package/lib/utils/utils.js +3 -0
  50. package/package.json +1 -1
@@ -0,0 +1,59 @@
1
+ import { readFileSync } from "fs";
2
+ import { GraphAILogger } from "graphai";
3
+ import Replicate from "replicate";
4
+ import { provider2SoundEffectAgent } from "../utils/provider2agent.js";
5
+ export const soundEffectReplicateAgent = async ({ namedInputs, params, config }) => {
6
+ const { prompt, movieFile } = namedInputs;
7
+ const apiKey = config?.apiKey;
8
+ const model = params.model ?? provider2SoundEffectAgent.replicate.defaultModel;
9
+ if (!apiKey) {
10
+ throw new Error("REPLICATE_API_TOKEN environment variable is required");
11
+ }
12
+ const replicate = new Replicate({
13
+ auth: apiKey,
14
+ });
15
+ const buffer = readFileSync(movieFile);
16
+ const uri = `data:video/quicktime;base64,${buffer.toString("base64")}`;
17
+ const input = {
18
+ video: uri,
19
+ prompt,
20
+ duration: params.duration,
21
+ // seed: -1,
22
+ // num_steps: 25,
23
+ // cfg_strength: 4.5,
24
+ // negative_prompt: "music"
25
+ };
26
+ try {
27
+ const model_identifier = provider2SoundEffectAgent.replicate.modelParams[model]?.identifier ?? model;
28
+ const output = await replicate.run(model_identifier, {
29
+ input,
30
+ });
31
+ if (output && typeof output === "object" && "url" in output) {
32
+ const videoUrl = output.url();
33
+ const videoResponse = await fetch(videoUrl);
34
+ if (!videoResponse.ok) {
35
+ throw new Error(`Error downloading video: ${videoResponse.status} - ${videoResponse.statusText}`);
36
+ }
37
+ const arrayBuffer = await videoResponse.arrayBuffer();
38
+ return { buffer: Buffer.from(arrayBuffer) };
39
+ }
40
+ return undefined;
41
+ }
42
+ catch (error) {
43
+ GraphAILogger.info("Failed to generate sound effect:", error.message);
44
+ throw error;
45
+ }
46
+ };
47
+ const soundEffectReplicateAgentInfo = {
48
+ name: "soundEffectReplicateAgent",
49
+ agent: soundEffectReplicateAgent,
50
+ mock: soundEffectReplicateAgent,
51
+ samples: [],
52
+ description: "Replicate Sound Effect agent (movie to movie)",
53
+ category: ["movie"],
54
+ author: "Receptron Team",
55
+ repository: "https://github.com/receptron/mulmocast-cli/",
56
+ license: "MIT",
57
+ environmentVariables: ["REPLICATE_API_TOKEN"],
58
+ };
59
+ export default soundEffectReplicateAgentInfo;
package/lib/mcp/server.js CHANGED
@@ -11,7 +11,7 @@ import { audio, images, movie, captions, pdf } from "../actions/index.js";
11
11
  import { initializeContext, runTranslateIfNeeded } from "../cli/helpers.js";
12
12
  import { outDirName } from "../utils/const.js";
13
13
  import { resolveDirPath, mkdir, generateTimestampedFileName } from "../utils/file.js";
14
- import { mulmoScriptSchema } from "../types/schema.js";
14
+ import { MulmoScriptMethods } from "../methods/index.js";
15
15
  const __filename = fileURLToPath(import.meta.url);
16
16
  const __dirname = path.dirname(__filename);
17
17
  // Load MulmoScript JSON Schema from file
@@ -83,7 +83,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
83
83
  }
84
84
  const { cmd, mulmoScript, options = {}, } = args;
85
85
  // Validate MulmoScript schema
86
- const validatedScript = mulmoScriptSchema.parse(mulmoScript);
86
+ const validatedScript = MulmoScriptMethods.validate(mulmoScript);
87
87
  // Save MulmoScript to output directory
88
88
  const filePath = await saveMulmoScriptToOutput(validatedScript);
89
89
  // Create argv-like object for CLI compatibility
@@ -3,3 +3,4 @@ export * from "./mulmo_script_template.js";
3
3
  export * from "./mulmo_studio_context.js";
4
4
  export * from "./mulmo_media_source.js";
5
5
  export * from "./mulmo_beat.js";
6
+ export * from "./mulmo_script.js";
@@ -3,3 +3,4 @@ export * from "./mulmo_script_template.js";
3
3
  export * from "./mulmo_studio_context.js";
4
4
  export * from "./mulmo_media_source.js";
5
5
  export * from "./mulmo_beat.js";
6
+ export * from "./mulmo_script.js";
@@ -1,15 +1,12 @@
1
1
  import "dotenv/config";
2
- import { MulmoCanvasDimension, MulmoBeat, SpeechOptions, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
2
+ import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
3
3
  export declare const MulmoPresentationStyleMethods: {
4
4
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
5
- getSpeechProvider(presentationStyle: MulmoPresentationStyle): Text2SpeechProvider;
6
5
  getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
7
6
  getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
8
- getSpeechOptions(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeechOptions | undefined;
7
+ getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
9
8
  getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
10
- getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
11
9
  getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
12
- getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
13
10
  getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
14
11
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
15
12
  getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
@@ -27,6 +24,14 @@ export declare const MulmoPresentationStyleMethods: {
27
24
  } | undefined;
28
25
  };
29
26
  };
27
+ getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
28
+ agentName: string;
29
+ defaultModel: import("../utils/provider2agent.js").ReplicateModel;
30
+ models: import("../utils/provider2agent.js").ReplicateModel[];
31
+ modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
32
+ identifier?: `${string}/${string}:${string}`;
33
+ }>;
34
+ };
30
35
  getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
31
36
  getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
32
37
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
@@ -1,7 +1,8 @@
1
1
  import "dotenv/config";
2
+ import { isNull } from "graphai";
2
3
  import { userAssert } from "../utils/utils.js";
3
4
  import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
4
- import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
5
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, defaultProviders, } from "../utils/provider2agent.js";
5
6
  const defaultTextSlideStyles = [
6
7
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
7
8
  "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
@@ -20,14 +21,10 @@ export const MulmoPresentationStyleMethods = {
20
21
  getCanvasSize(presentationStyle) {
21
22
  return mulmoCanvasDimensionSchema.parse(presentationStyle.canvasSize);
22
23
  },
23
- getSpeechProvider(presentationStyle) {
24
- return text2SpeechProviderSchema.parse(presentationStyle.speechParams?.provider);
25
- },
26
24
  getAllSpeechProviders(presentationStyle) {
27
25
  const providers = new Set();
28
- const defaultProvider = this.getSpeechProvider(presentationStyle);
29
26
  Object.values(presentationStyle.speechParams.speakers).forEach((speaker) => {
30
- const provider = speaker.provider ?? defaultProvider;
27
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
31
28
  providers.add(provider);
32
29
  });
33
30
  return providers;
@@ -39,27 +36,27 @@ export const MulmoPresentationStyleMethods = {
39
36
  // This code allows us to support both string and array of strings for cssStyles
40
37
  return [...defaultTextSlideStyles, ...[styles], ...[extraStyles]].flat().join("\n");
41
38
  },
42
- getSpeechOptions(presentationStyle, beat) {
43
- return { ...presentationStyle.speechParams.speakers[beat.speaker].speechOptions, ...beat.speechOptions };
39
+ getDefaultSpeaker(presentationStyle) {
40
+ const speakers = presentationStyle.speechParams.speakers ?? {};
41
+ const keys = Object.keys(speakers).sort();
42
+ userAssert(keys.length !== 0, "presentationStyle.speechParams.speakers is not set!!");
43
+ const defaultSpeaker = keys.find((key) => speakers[key].isDefault);
44
+ if (!isNull(defaultSpeaker)) {
45
+ return defaultSpeaker;
46
+ }
47
+ return keys[0];
44
48
  },
45
49
  getSpeaker(presentationStyle, beat) {
46
50
  userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
47
- userAssert(!!beat?.speaker, "beat.speaker is not set");
48
- const speaker = presentationStyle.speechParams.speakers[beat.speaker];
49
- userAssert(!!speaker, `speaker is not set: speaker "${beat.speaker}"`);
51
+ const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
52
+ userAssert(!!speakerId, "beat.speaker and default speaker is not set");
53
+ const speaker = presentationStyle.speechParams.speakers[speakerId];
54
+ userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
50
55
  return speaker;
51
56
  },
52
- getTTSProvider(presentationStyle, beat) {
53
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
54
- return speaker.provider ?? presentationStyle.speechParams.provider;
55
- },
56
57
  getTTSModel(presentationStyle, beat) {
57
58
  const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
58
- return speaker.model ?? presentationStyle.speechParams.model;
59
- },
60
- getVoiceId(presentationStyle, beat) {
61
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
62
- return speaker.voiceId;
59
+ return speaker.model;
63
60
  },
64
61
  getText2ImageProvider(provider) {
65
62
  return text2ImageProviderSchema.parse(provider);
@@ -89,6 +86,13 @@ export const MulmoPresentationStyleMethods = {
89
86
  movieParams,
90
87
  };
91
88
  },
89
+ getSoundEffectAgentInfo(presentationStyle, beat) {
90
+ const soundEffectProvider = (beat.soundEffectParams?.provider ??
91
+ presentationStyle.soundEffectParams?.provider ??
92
+ defaultProviders.soundEffect);
93
+ const agentInfo = provider2SoundEffectAgent[soundEffectProvider];
94
+ return agentInfo;
95
+ },
92
96
  getConcurrency(presentationStyle) {
93
97
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
94
98
  if (imageAgentInfo.imageParams.provider === "openai") {
@@ -0,0 +1,4 @@
1
+ import { MulmoScript } from "../types/index.js";
2
+ export declare const MulmoScriptMethods: {
3
+ validate(script: any): MulmoScript;
4
+ };
@@ -0,0 +1,31 @@
1
+ import { mulmoScriptSchema } from "../types/index.js";
2
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3
+ const validate_1_0 = (script) => {
4
+ if (script.speechParams?.provider) {
5
+ if (typeof script.speechParams.speakers === "object") {
6
+ Object.keys(script.speechParams.speakers).forEach((speakerId) => {
7
+ const speaker = script.speechParams.speakers[speakerId];
8
+ if (!speaker.provider) {
9
+ speaker.provider = script.speechParams.provider;
10
+ }
11
+ });
12
+ }
13
+ delete script.speechParams.provider;
14
+ }
15
+ return script;
16
+ };
17
+ const validators = [{ from: "1.0", to: "1.1", validator: validate_1_0 }];
18
+ export const MulmoScriptMethods = {
19
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
20
+ validate(script) {
21
+ const validatedScript = validators.reduce((acc, validator) => {
22
+ if (acc.$mulmocast.version === validator.from) {
23
+ const validated = validator.validator(acc);
24
+ validated.$mulmocast.version = validator.to;
25
+ return validated;
26
+ }
27
+ return acc;
28
+ }, script);
29
+ return mulmoScriptSchema.parse(validatedScript);
30
+ },
31
+ };
@@ -58,8 +58,17 @@ export type ReplicateMovieAgentParams = {
58
58
  };
59
59
  duration?: number;
60
60
  };
61
+ export type ReplicateSoundEffectAgentParams = {
62
+ model: `${string}/${string}` | undefined;
63
+ duration?: number;
64
+ };
65
+ export type SoundEffectAgentInputs = AgentPromptInputs & {
66
+ soundEffectFile: string;
67
+ movieFile: string;
68
+ };
61
69
  export type GoogleMovieAgentConfig = GoogleImageAgentConfig;
62
70
  export type ReplicateMovieAgentConfig = AgentConfig;
71
+ export type ReplicateSoundEffectAgentConfig = AgentConfig;
63
72
  export type TTSAgentParams = {
64
73
  suppressError: boolean;
65
74
  voice: string;