mulmocast 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ {
2
+ "title": "Presentation with Ani in Japanese",
3
+ "description": "Template for presentation with Ani in Japanese.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "bytedance/seedance-1-lite"
13
+ },
14
+ "speechParams": {
15
+ "provider": "openai",
16
+ "speakers": {
17
+ "Presenter": {
18
+ "voiceId": "shimmer",
19
+ "speechOptions": { "instruction": "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl." }
20
+ }
21
+ }
22
+ },
23
+ "audioParams": {
24
+ "bgm": {
25
+ "kind": "url",
26
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
27
+ }
28
+ },
29
+ "lang": "en",
30
+ "canvasSize": {
31
+ "width": 1024,
32
+ "height": 1536
33
+ },
34
+ "imageParams": {
35
+ "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
36
+ "images": {
37
+ "ani": {
38
+ "type": "image",
39
+ "source": {
40
+ "kind": "url",
41
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
42
+ }
43
+ }
44
+ }
45
+ }
46
+ },
47
+ "scriptName": "image_prompts_template.json"
48
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "title": "Presentation with Ani",
3
+ "description": "Template for presentation with Ani.",
4
+ "systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "bytedance/seedance-1-lite"
13
+ },
14
+ "audioParams": {
15
+ "bgm": {
16
+ "kind": "url",
17
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
18
+ }
19
+ },
20
+ "lang": "ja",
21
+ "canvasSize": {
22
+ "width": 1024,
23
+ "height": 1536
24
+ },
25
+ "speechParams": {
26
+ "provider": "nijivoice",
27
+ "speakers": {
28
+ "Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
29
+ }
30
+ },
31
+ "imageParams": {
32
+ "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
33
+ "images": {
34
+ "ani": {
35
+ "type": "image",
36
+ "source": {
37
+ "kind": "url",
38
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
39
+ }
40
+ }
41
+ }
42
+ }
43
+ },
44
+ "scriptName": "image_prompts_template.json"
45
+ }
@@ -143,6 +143,7 @@ const graph_data = {
143
143
  },
144
144
  addBGM: {
145
145
  agent: "addBGMAgent",
146
+ unless: ":context.presentationStyle.audioParams.bgmVolume.equal(0)",
146
147
  inputs: {
147
148
  wait: ":combineFiles",
148
149
  voiceFile: ":audioCombinedFilePath",
@@ -153,6 +154,7 @@ const graph_data = {
153
154
  },
154
155
  },
155
156
  isResult: true,
157
+ defaultValue: {},
156
158
  },
157
159
  title: {
158
160
  agent: "copyAgent",
@@ -44,17 +44,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
44
44
  } | {
45
45
  imagePath: string;
46
46
  imageFromMovie: boolean;
47
- movieParams: {
48
- speed?: number | undefined;
49
- model?: string | undefined;
50
- fillOption?: {
51
- style: "aspectFit" | "aspectFill";
52
- } | undefined;
53
- provider?: string | undefined;
54
- transition?: {
55
- type: "fade" | "slideout_left";
56
- duration: number;
57
- } | undefined;
47
+ movieAgentInfo: {
48
+ agent: string;
49
+ movieParams: {
50
+ speed?: number | undefined;
51
+ provider?: string | undefined;
52
+ model?: string | undefined;
53
+ fillOption?: {
54
+ style: "aspectFit" | "aspectFill";
55
+ } | undefined;
56
+ transition?: {
57
+ type: "fade" | "slideout_left";
58
+ duration: number;
59
+ } | undefined;
60
+ };
58
61
  };
59
62
  imageParams: {
60
63
  provider: string;
@@ -91,17 +94,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
91
94
  imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
92
95
  prompt: string;
93
96
  referenceImages: string[];
94
- movieParams: {
95
- speed?: number | undefined;
96
- model?: string | undefined;
97
- fillOption?: {
98
- style: "aspectFit" | "aspectFill";
99
- } | undefined;
100
- provider?: string | undefined;
101
- transition?: {
102
- type: "fade" | "slideout_left";
103
- duration: number;
104
- } | undefined;
97
+ movieAgentInfo: {
98
+ agent: string;
99
+ movieParams: {
100
+ speed?: number | undefined;
101
+ provider?: string | undefined;
102
+ model?: string | undefined;
103
+ fillOption?: {
104
+ style: "aspectFit" | "aspectFill";
105
+ } | undefined;
106
+ transition?: {
107
+ type: "fade" | "slideout_left";
108
+ duration: number;
109
+ } | undefined;
110
+ };
105
111
  };
106
112
  imageParams: {
107
113
  provider: string;
@@ -28,15 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
28
28
  // undefined prompt indicates that image generation is not needed
29
29
  return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
30
30
  }
31
- const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
32
- GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
31
+ const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
32
+ GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
33
33
  if (beat.moviePrompt && !beat.imagePrompt) {
34
- return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
34
+ return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
35
35
  }
36
36
  // referenceImages for "edit_image", openai agent.
37
37
  const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
38
38
  const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
39
- return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
39
+ return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieAgentInfo };
40
40
  };
41
41
  export const imagePluginAgent = async (namedInputs) => {
42
42
  const { context, beat, index } = namedInputs;
@@ -39,7 +39,6 @@ const beat_graph_data = {
39
39
  nodes: {
40
40
  context: {},
41
41
  htmlImageAgentInfo: {},
42
- movieAgentInfo: {},
43
42
  imageRefs: {},
44
43
  beat: {},
45
44
  __mapIndex: {},
@@ -134,7 +133,7 @@ const beat_graph_data = {
134
133
  },
135
134
  movieGenerator: {
136
135
  if: ":preprocessor.movieFile",
137
- agent: ":movieAgentInfo.agent",
136
+ agent: ":preprocessor.movieAgentInfo.agent",
138
137
  inputs: {
139
138
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
140
139
  prompt: ":beat.moviePrompt",
@@ -147,7 +146,7 @@ const beat_graph_data = {
147
146
  mulmoContext: ":context",
148
147
  },
149
148
  params: {
150
- model: ":preprocessor.movieParams.model",
149
+ model: ":preprocessor.movieAgentInfo.movieParams.model",
151
150
  duration: ":beat.duration",
152
151
  canvasSize: ":context.presentationStyle.canvasSize",
153
152
  },
@@ -167,16 +166,19 @@ const beat_graph_data = {
167
166
  defaultValue: {},
168
167
  },
169
168
  audioChecker: {
170
- if: ":preprocessor.movieFile",
171
169
  agent: async (namedInputs) => {
172
- const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
170
+ const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
171
+ if (!sourceFile) {
172
+ return { hasMovieAudio: false };
173
+ }
174
+ const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
173
175
  return { hasMovieAudio: hasAudio };
174
176
  },
175
177
  inputs: {
176
- onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
178
+ onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
177
179
  movieFile: ":preprocessor.movieFile",
180
+ imageFile: ":preprocessor.imagePath",
178
181
  },
179
- defaultValue: {},
180
182
  },
181
183
  output: {
182
184
  agent: "copyAgent",
@@ -201,7 +203,6 @@ const graph_data = {
201
203
  nodes: {
202
204
  context: {},
203
205
  htmlImageAgentInfo: {},
204
- movieAgentInfo: {},
205
206
  outputStudioFilePath: {},
206
207
  imageRefs: {},
207
208
  map: {
@@ -210,7 +211,6 @@ const graph_data = {
210
211
  rows: ":context.studio.script.beats",
211
212
  context: ":context",
212
213
  htmlImageAgentInfo: ":htmlImageAgentInfo",
213
- movieAgentInfo: ":movieAgentInfo",
214
214
  imageRefs: ":imageRefs",
215
215
  },
216
216
  isResult: true,
@@ -314,9 +314,6 @@ const prepareGenerateImages = async (context) => {
314
314
  const injections = {
315
315
  context,
316
316
  htmlImageAgentInfo,
317
- movieAgentInfo: {
318
- agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
319
- },
320
317
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
321
318
  imageRefs,
322
319
  };
@@ -1,6 +1,7 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
+ import { provider2MovieAgent } from "../utils/provider2agent.js";
4
5
  async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
5
6
  const replicate = new Replicate({
6
7
  auth: apiKey,
@@ -21,7 +22,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
21
22
  if (imagePath) {
22
23
  const buffer = readFileSync(imagePath);
23
24
  const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
24
- if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
25
+ if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro" || model === "minimax/hailuo-02") {
25
26
  input.start_image = base64Image;
26
27
  }
27
28
  else {
@@ -29,7 +30,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
29
30
  }
30
31
  }
31
32
  try {
32
- const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
33
+ const output = await replicate.run(model ?? provider2MovieAgent.replicate.defaultModel, { input });
33
34
  // Download the generated video
34
35
  if (output && typeof output === "object" && "url" in output) {
35
36
  const videoUrl = output.url();
@@ -1,4 +1,4 @@
1
- import { llm } from "../../../../utils/utils.js";
1
+ import { llm } from "../../../../utils/provider2agent.js";
2
2
  import { getAvailableTemplates } from "../../../../utils/file.js";
3
3
  const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
4
4
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import { LLM } from "../../../../utils/utils.js";
2
+ import type { LLM } from "../../../../utils/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
@@ -1,5 +1,5 @@
1
1
  import { getAvailableTemplates } from "../../../../utils/file.js";
2
- import { llm } from "../../../../utils/utils.js";
2
+ import { llm } from "../../../../utils/provider2agent.js";
3
3
  import { storyToScriptGenerateMode } from "../../../../utils/const.js";
4
4
  const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
5
5
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import { LLM } from "../../../../utils/utils.js";
2
+ import type { LLM } from "../../../../utils/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
@@ -12,7 +12,21 @@ export declare const MulmoPresentationStyleMethods: {
12
12
  getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
13
13
  getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
14
14
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
15
- getMovieAgent(presentationStyle: MulmoPresentationStyle): string;
15
+ getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
16
+ agent: string;
17
+ movieParams: {
18
+ speed?: number | undefined;
19
+ provider?: string | undefined;
20
+ model?: string | undefined;
21
+ fillOption?: {
22
+ style: "aspectFit" | "aspectFill";
23
+ } | undefined;
24
+ transition?: {
25
+ type: "fade" | "slideout_left";
26
+ duration: number;
27
+ } | undefined;
28
+ };
29
+ };
16
30
  getConcurrency(presentationStyle: MulmoPresentationStyle): 4 | 16;
17
31
  getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
18
32
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
@@ -1,7 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import { userAssert } from "../utils/utils.js";
3
- import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
4
- import { defaultProviders, provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
3
+ import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
4
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
5
5
  const defaultTextSlideStyles = [
6
6
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
7
7
  "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
@@ -80,17 +80,16 @@ export const MulmoPresentationStyleMethods = {
80
80
  imageParams: { ...defaultImageParams, ...imageParams },
81
81
  };
82
82
  },
83
- // Determine movie agent based on provider
84
- getMovieAgent(presentationStyle) {
85
- const movieProvider = (presentationStyle.movieParams?.provider ?? defaultProviders.text2movie);
86
- return provider2MovieAgent[movieProvider].agentName;
83
+ getMovieAgentInfo(presentationStyle, beat) {
84
+ const movieParams = { ...presentationStyle.movieParams, ...beat?.movieParams };
85
+ const movieProvider = text2MovieProviderSchema.parse(movieParams?.provider);
86
+ const agentInfo = provider2MovieAgent[movieProvider];
87
+ return {
88
+ agent: agentInfo.agentName,
89
+ movieParams,
90
+ };
87
91
  },
88
92
  getConcurrency(presentationStyle) {
89
- /*
90
- if (presentationStyle.movieParams?.provider === "replicate") {
91
- return 4;
92
- }
93
- */
94
93
  const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
95
94
  if (imageAgentInfo.imageParams.provider === "openai") {
96
95
  // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
@@ -1,5 +1,5 @@
1
1
  import { MulmoStoryboard, StoryToScriptGenerateMode } from "../types/index.js";
2
- import { LLM } from "../utils/utils.js";
2
+ import type { LLM } from "../utils/provider2agent.js";
3
3
  export declare const storyToScript: ({ story, beatsPerScene, templateName, outdir, fileName, llm, llmModel, generateMode, }: {
4
4
  story: MulmoStoryboard;
5
5
  beatsPerScene: number;