mulmocast 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +27 -9
  2. package/assets/font/NotoSansJP-Regular.ttf +0 -0
  3. package/assets/html/chart.html +1 -10
  4. package/assets/html/mermaid.html +1 -13
  5. package/assets/templates/business.json +16 -27
  6. package/assets/templates/coding.json +58 -21
  7. package/lib/actions/audio.d.ts +1 -1
  8. package/lib/actions/audio.js +43 -27
  9. package/lib/actions/images.js +20 -26
  10. package/lib/actions/index.d.ts +5 -0
  11. package/lib/actions/index.js +5 -0
  12. package/lib/actions/movie.d.ts +9 -1
  13. package/lib/actions/movie.js +97 -38
  14. package/lib/actions/pdf.d.ts +2 -0
  15. package/lib/actions/pdf.js +211 -0
  16. package/lib/actions/pdf2.d.ts +2 -0
  17. package/lib/actions/pdf2.js +203 -0
  18. package/lib/actions/translate.js +22 -9
  19. package/lib/agents/anthropic_agent.d.ts +23 -0
  20. package/lib/agents/anthropic_agent.js +162 -0
  21. package/lib/agents/combine_audio_files_agent.js +13 -22
  22. package/lib/agents/nested_agent.d.ts +9 -0
  23. package/lib/agents/nested_agent.js +138 -0
  24. package/lib/cli/args.d.ts +3 -1
  25. package/lib/cli/args.js +49 -34
  26. package/lib/cli/cli.d.ts +14 -0
  27. package/lib/cli/cli.js +48 -46
  28. package/lib/cli/tool-args.d.ts +2 -0
  29. package/lib/cli/tool-args.js +12 -2
  30. package/lib/cli/tool-cli.js +6 -4
  31. package/lib/methods/index.d.ts +1 -0
  32. package/lib/methods/index.js +1 -0
  33. package/lib/methods/mulmo_media_source.d.ts +4 -0
  34. package/lib/methods/mulmo_media_source.js +21 -0
  35. package/lib/methods/mulmo_script.d.ts +2 -6
  36. package/lib/methods/mulmo_script.js +12 -5
  37. package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
  38. package/lib/tools/create_mulmo_script_interactively.js +61 -20
  39. package/lib/types/index.d.ts +1 -0
  40. package/lib/types/index.js +1 -0
  41. package/lib/types/schema.d.ts +3626 -3162
  42. package/lib/types/schema.js +75 -41
  43. package/lib/types/type.d.ts +28 -1
  44. package/lib/utils/const.d.ts +2 -0
  45. package/lib/utils/const.js +2 -0
  46. package/lib/utils/file.d.ts +4 -1
  47. package/lib/utils/file.js +15 -1
  48. package/lib/utils/filters.js +1 -1
  49. package/lib/utils/image_plugins/chart.d.ts +3 -0
  50. package/lib/utils/image_plugins/chart.js +18 -0
  51. package/lib/utils/image_plugins/image.d.ts +2 -0
  52. package/lib/utils/image_plugins/image.js +3 -0
  53. package/lib/utils/image_plugins/index.d.ts +7 -0
  54. package/lib/utils/image_plugins/index.js +7 -0
  55. package/lib/utils/image_plugins/markdown.d.ts +3 -0
  56. package/lib/utils/image_plugins/markdown.js +11 -0
  57. package/lib/utils/image_plugins/mermaid.d.ts +3 -0
  58. package/lib/utils/image_plugins/mermaid.js +21 -0
  59. package/lib/utils/image_plugins/movie.d.ts +2 -0
  60. package/lib/utils/image_plugins/movie.js +3 -0
  61. package/lib/utils/image_plugins/source.d.ts +4 -0
  62. package/lib/utils/image_plugins/source.js +15 -0
  63. package/lib/utils/image_plugins/text_slide.d.ts +3 -0
  64. package/lib/utils/image_plugins/text_slide.js +12 -0
  65. package/lib/utils/image_plugins/type_guards.d.ts +6 -0
  66. package/lib/utils/image_plugins/type_guards.js +21 -0
  67. package/lib/utils/markdown.js +4 -1
  68. package/lib/utils/pdf.d.ts +8 -0
  69. package/lib/utils/pdf.js +75 -0
  70. package/lib/utils/preprocess.d.ts +58 -128
  71. package/lib/utils/preprocess.js +37 -37
  72. package/lib/utils/utils.d.ts +12 -0
  73. package/lib/utils/utils.js +34 -0
  74. package/package.json +13 -4
package/README.md CHANGED
@@ -1,6 +1,14 @@
1
1
  # mulmocast
2
2
 
3
- A CLI tool for generating podcast and video content from script files. Automates the process of creating audio, images, and video from structured script files.
3
+ A CLI tool for generating podcast and video content from script files (MulmoScript). Automates the process of creating audio, images, and video from structured MulmoScript files.
4
+
5
+ ## What is MulmoScript?
6
+
7
+ **MulmoScript** is a simple JSON/YAML format for describing multi-modal content.
8
+ You can define speakers, text, images, and layout — all in one script.
9
+
10
+ A Hello World script is available in [./docs/scripts](./docs/scripts).
11
+ See [MulmoScript Format](#mulmoscript-format) for details on the structure.
4
12
 
5
13
  ## Installation
6
14
 
@@ -22,11 +30,11 @@ brew install ffmpeg
22
30
  Create a `.env` file in your project directory with the following API keys:
23
31
 
24
32
  ### Required
25
- ```
33
+ ```bash
26
34
  OPENAI_API_KEY=your_openai_api_key
27
35
  ```
28
36
  ### Optional
29
- ```
37
+ ```bash
30
38
  DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
31
39
  GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
32
40
  NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
@@ -42,13 +50,13 @@ BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo-tool
42
50
 
43
51
  ## Quick Start
44
52
 
45
- ```
53
+ ```bash
46
54
  # Generate script with interactive mode
47
- mulmo-tool scripting -i -t children_book -o ./ -f story
55
+ mulmo-tool scripting -i -t children_book -o ./ -s story
48
56
  ```
49
57
  After running this command, you'll create a story script through an interactive conversation with the AI.
50
58
 
51
- ```
59
+ ```bash
52
60
  # Generate both audio and images, then combine into video
53
61
  mulmo movie {generated_script_file}
54
62
  ```
@@ -57,7 +65,7 @@ Replace `{generated_script_file}` with the output file from the previous command
57
65
  ## Generate MulmoScript
58
66
 
59
67
  ```bash
60
- # Generate script from web content
68
+ # Generate script from web content (requires Browserless API KEY)
61
69
  mulmo-tool scripting -u https://example.com
62
70
 
63
71
  # Generate script with interactive mode
@@ -113,8 +121,13 @@ https://github.com/receptron/mulmocast-cli/tree/main/scripts
113
121
 
114
122
  CLI Usage
115
123
 
124
+ ### `mulmo -h` Output
125
+ Use this command to generate content (audio, images, movie) from an existing MulmoScript.
126
+
127
+ ```bash
128
+ mulmo -h
116
129
  ```
117
- $ mulmo -h
130
+ ```bash
118
131
  mulmo <action> <file>
119
132
 
120
133
  Run mulmocast
@@ -136,9 +149,14 @@ Options:
136
149
 
137
150
  ```
138
151
 
152
+ ### `mulmo-tool -h` Output
153
+ Use this command to generate a new MulmoScript from a URL or interactively.
154
+
155
+ ```bash
156
+ mulmo-tool -h
139
157
  ```
140
- $ mulmo-tool -h
141
158
 
159
+ ```bash
142
160
  mulmo-tool <action>
143
161
 
144
162
  Run mulmocast tool
@@ -5,20 +5,11 @@
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
6
  <title>Simple Chart.js Bar Chart</title>
7
7
  <style>
8
- body {
9
- font-family: Arial, sans-serif;
10
- padding: 40px; padding-top: 60px;
11
- }
8
+ ${style}
12
9
  .chart-container {
13
10
  width: ${width}px;
14
11
  margin: 0 auto;
15
12
  }
16
- h1 {
17
- text-align: center;
18
- font-size: 50px;
19
- color: #333;
20
- margin-bottom: 80px;
21
- }
22
13
  </style>
23
14
  <!-- Include Chart.js from CDN -->
24
15
  <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
@@ -5,24 +5,12 @@
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
6
  <title>Mermaid Diagram</title>
7
7
  <style>
8
- body {
9
- box-sizing: border-box;
10
- font-family: Arial, sans-serif;
11
- padding: 40px;
12
- padding-top: 60px;
13
- height: 100vh;
14
- }
8
+ ${style}
15
9
  .container {
16
10
  height: 100%;
17
11
  display: flex;
18
12
  flex-direction: column;
19
13
  }
20
- h1 {
21
- text-align: center;
22
- font-size: 50px;
23
- color: #333;
24
- margin-bottom: 30px;
25
- }
26
14
  .mermaid {
27
15
  width: 100%;
28
16
  height: 100%;
@@ -1,20 +1,13 @@
1
1
  {
2
2
  "title": "Business presentation",
3
3
  "description": "Template for business presentation.",
4
- "systemPrompt": "Generate a script for a business presentation of the given topic. Use textSlides or markdown to show slides or use imagePrompt to generate an appropriate image for that page. Use the JSON below as a template. chartData is the data for Chart.js",
4
+ "systemPrompt": "Generate a script for a business presentation of the given topic. Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article to reuse them in the presentation. Use the JSON below as a template. chartData is the data for Chart.js",
5
5
  "script": {
6
6
  "$mulmocast": {
7
7
  "version": "1.0",
8
8
  "credit": "closing"
9
9
  },
10
10
  "title": "Sample Title",
11
- "canvasSize": {
12
- "width": 1536,
13
- "height": 1024
14
- },
15
- "imageParams": {
16
- "style": "Style appropriate for business environment."
17
- },
18
11
  "speechParams": {
19
12
  "speakers": {
20
13
  "Presenter": {
@@ -26,19 +19,6 @@
26
19
  }
27
20
  }
28
21
  },
29
- "textSlideParams": {
30
- "cssStyles": [
31
- "body { padding: 40px; padding-top: 60px; font-family: Arial, sans-serif;color:#333; font-size: 36px }",
32
- "h1 { font-size: 50px; text-align: center }",
33
- "ul { margin-left: 36px } ",
34
- "pre { background: #eeeecc; font-size: 24px; padding:10px }",
35
- "p { margin-left: 36px }",
36
- "table { font-size: 36px; margin: auto; border: 1px solid gray; border-collapse: collapse }",
37
- "th { border-bottom: 1px solid gray }",
38
- "td, th { padding: 8px }",
39
- "tr:nth-child(even) { background-color: #eee }"
40
- ]
41
- },
42
22
  "lang": "en",
43
23
  "beats": [
44
24
  {
@@ -90,11 +70,6 @@
90
70
  ]
91
71
  }
92
72
  },
93
- {
94
- "speaker": "Presenter",
95
- "text": "Our story begins with tree-dwelling primates—small mammals with forward-facing eyes and grasping hands. These ancestors lived in forests and evolved traits useful for climbing and visual depth perception.",
96
- "imagePrompt": "tree-dwelling primates with forward-facing eyes in a jungle."
97
- },
98
73
  {
99
74
  "speaker": "Presenter",
100
75
  "text": "This page shows the sales and profits of this company from January 2024 to June 2024.",
@@ -135,7 +110,21 @@
135
110
  "image": {
136
111
  "type": "mermaid",
137
112
  "title": "Business Process Flow",
138
- "code": "graph LR\n A[Market Research] --> B[Product Planning]\n B --> C[Development]\n C --> D[Testing]\n D --> E[Manufacturing]\n E --> F[Marketing]\n F --> G[Sales]\n G --> H[Customer Support]\n H --> A"
113
+ "code": {
114
+ "kind": "text",
115
+ "text": "graph LR\n A[Market Research] --> B[Product Planning]\n B --> C[Development]\n C --> D[Testing]\n D --> E[Manufacturing]\n E --> F[Marketing]\n F --> G[Sales]\n G --> H[Customer Support]\n H --> A"
116
+ }
117
+ }
118
+ },
119
+ {
120
+ "speaker": "Presenter",
121
+ "text": "This is the image of a pingpong ball.",
122
+ "image": {
123
+ "type": "image",
124
+ "source": {
125
+ "kind": "url",
126
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.png"
127
+ }
139
128
  }
140
129
  }
141
130
  ]
@@ -1,20 +1,13 @@
1
1
  {
2
2
  "title": "Coding presentation",
3
3
  "description": "Template for software and coding presentation.",
4
- "systemPrompt": "Generate a script for a technical presentation of the given topic. Use markdown with a code block to show some code on a slide. Use the JSON below as a template.",
4
+ "systemPrompt": "Generate a script for a technical presentation of the given topic. Use markdown with a code block to show some code on a slide. Avoid long coding examples, which may not fit in a single slide. Use the JSON below as a template.",
5
5
  "script": {
6
6
  "$mulmocast": {
7
7
  "version": "1.0",
8
8
  "credit": "closing"
9
9
  },
10
10
  "title": "Sample Title",
11
- "canvasSize": {
12
- "width": 1536,
13
- "height": 1024
14
- },
15
- "imageParams": {
16
- "style": "Style appropriate for software presentation."
17
- },
18
11
  "speechParams": {
19
12
  "speakers": {
20
13
  "Presenter": {
@@ -27,19 +20,6 @@
27
20
  }
28
21
  },
29
22
  "lang": "en",
30
- "textSlideParams": {
31
- "cssStyles": [
32
- "body { margin: 40px; margin-top: 60px; color:#333; font-size: 48px }",
33
- "h1 { font-size: 60px; text-align: center }",
34
- "ul { margin-left: 40px } ",
35
- "pre { background: #eeeecc; font-size: 24px; padding:10px }",
36
- "p { margin-left: 40px }",
37
- "table { font-size: 40px; margin: auto; border: 1px solid gray; border-collapse: collapse }",
38
- "th { border-bottom: 1px solid gray }",
39
- "td, th { padding: 8px }",
40
- "tr:nth-child(even) { background-color: #eee }"
41
- ]
42
- },
43
23
  "beats": [
44
24
  {
45
25
  "speaker": "Presenter",
@@ -97,6 +77,63 @@
97
77
  "| Codecademy Hoodie | False | 42.99 |"
98
78
  ]
99
79
  }
80
+ },
81
+ {
82
+ "speaker": "Presenter",
83
+ "text": "Next, let's look at a diagram of our business process flow. This illustrates the key steps from product development to sales.",
84
+ "image": {
85
+ "type": "mermaid",
86
+ "title": "Business Process Flow",
87
+ "code": {
88
+ "kind": "text",
89
+ "text": "graph LR\n A[Market Research] --> B[Product Planning]\n B --> C[Development]\n C --> D[Testing]\n D --> E[Manufacturing]\n E --> F[Marketing]\n F --> G[Sales]\n G --> H[Customer Support]\n H --> A"
90
+ }
91
+ }
92
+ },
93
+ {
94
+ "speaker": "Presenter",
95
+ "text": "This page shows the sales and profits of this company from January 2024 to June 2024.",
96
+ "image": {
97
+ "type": "chart",
98
+ "title": "Sales and Profits (from Jan to June)",
99
+ "chartData": {
100
+ "type": "bar",
101
+ "data": {
102
+ "labels": ["January", "February", "March", "April", "May", "June"],
103
+ "datasets": [
104
+ {
105
+ "label": "Revenue ($1000s)",
106
+ "data": [120, 135, 180, 155, 170, 190],
107
+ "backgroundColor": "rgba(54, 162, 235, 0.5)",
108
+ "borderColor": "rgba(54, 162, 235, 1)",
109
+ "borderWidth": 1
110
+ },
111
+ {
112
+ "label": "Profit ($1000s)",
113
+ "data": [45, 52, 68, 53, 61, 73],
114
+ "backgroundColor": "rgba(75, 192, 192, 0.5)",
115
+ "borderColor": "rgba(75, 192, 192, 1)",
116
+ "borderWidth": 1
117
+ }
118
+ ]
119
+ },
120
+ "options": {
121
+ "responsive": true,
122
+ "animation": false
123
+ }
124
+ }
125
+ }
126
+ },
127
+ {
128
+ "speaker": "Presenter",
129
+ "text": "This is the image of a pingpong ball.",
130
+ "image": {
131
+ "type": "image",
132
+ "source": {
133
+ "kind": "url",
134
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.png"
135
+ }
136
+ }
100
137
  }
101
138
  ]
102
139
  }
@@ -1,3 +1,3 @@
1
1
  import "dotenv/config";
2
2
  import { MulmoStudioContext } from "../types/index.js";
3
- export declare const audio: (context: MulmoStudioContext, concurrency: number) => Promise<void>;
3
+ export declare const audio: (context: MulmoStudioContext) => Promise<void>;
@@ -8,43 +8,59 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
8
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
9
  import { MulmoScriptMethods } from "../methods/index.js";
10
10
  import { fileCacheAgentFilter } from "../utils/filters.js";
11
- import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
11
+ import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
12
+ import { text2hash } from "../utils/utils.js";
12
13
  const { default: __, ...vanillaAgents } = agents;
13
14
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
14
15
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
16
+ const provider_to_agent = {
17
+ nijivoice: "ttsNijivoiceAgent",
18
+ openai: "ttsOpenaiAgent",
19
+ };
20
+ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
21
+ if (beat.audio?.type === "audio") {
22
+ const path = resolveMediaSource(beat.audio.source, context);
23
+ if (path) {
24
+ return path;
25
+ }
26
+ throw new Error("Invalid audio source");
27
+ }
28
+ return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
29
+ };
30
+ const preprocessor = (namedInputs) => {
31
+ const { beat, index, context, audioDirPath } = namedInputs;
32
+ const studioBeat = context.studio.beats[index];
33
+ const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
34
+ const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
35
+ const hash_string = `${beat.text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
36
+ const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}`;
37
+ const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
38
+ studioBeat.audioFile = audioPath;
39
+ return {
40
+ ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
41
+ studioBeat,
42
+ voiceId,
43
+ speechOptions,
44
+ audioPath,
45
+ };
46
+ };
15
47
  const graph_tts = {
16
48
  nodes: {
17
49
  preprocessor: {
18
- agent: (namedInputs) => {
19
- const { beat, script, speakers } = namedInputs;
20
- return {
21
- voiceId: speakers[beat.speaker].voiceId,
22
- speechOptions: MulmoScriptMethods.getSpeechOptions(script, beat),
23
- };
24
- },
50
+ agent: preprocessor,
25
51
  inputs: {
26
52
  beat: ":beat",
27
- script: ":script",
28
- speakers: ":script.speechParams.speakers",
29
- },
30
- },
31
- ttsAgent: {
32
- agent: (namedInputs) => {
33
- if (namedInputs.provider === "nijivoice") {
34
- return "ttsNijivoiceAgent";
35
- }
36
- return "ttsOpenaiAgent";
37
- },
38
- inputs: {
39
- provider: ":script.speechParams.provider",
53
+ index: ":__mapIndex",
54
+ context: ":context",
55
+ audioDirPath: ":audioDirPath",
40
56
  },
41
57
  },
42
58
  tts: {
43
59
  unless: ":beat.audio",
44
- agent: ":ttsAgent",
60
+ agent: ":preprocessor.ttsAgent",
45
61
  inputs: {
46
62
  text: ":beat.text",
47
- file: "${:audioSegmentDirPath}/${:beat.audioFile}.mp3", // TODO
63
+ file: ":preprocessor.audioPath",
48
64
  force: ":context.force",
49
65
  },
50
66
  params: {
@@ -68,8 +84,8 @@ const graph_data = {
68
84
  map: {
69
85
  agent: "mapAgent",
70
86
  inputs: {
71
- rows: ":context.studio.beats",
72
- script: ":context.studio.script",
87
+ rows: ":context.studio.script.beats",
88
+ studio: ":context.studio",
73
89
  audioDirPath: ":audioDirPath",
74
90
  audioSegmentDirPath: ":audioSegmentDirPath",
75
91
  context: ":context",
@@ -128,7 +144,7 @@ const agentFilters = [
128
144
  nodeIds: ["tts"],
129
145
  },
130
146
  ];
131
- export const audio = async (context, concurrency) => {
147
+ export const audio = async (context) => {
132
148
  const { studio, fileDirs } = context;
133
149
  const { outDirPath, audioDirPath } = fileDirs;
134
150
  const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
@@ -137,7 +153,7 @@ export const audio = async (context, concurrency) => {
137
153
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
138
154
  mkdir(outDirPath);
139
155
  mkdir(audioSegmentDirPath);
140
- graph_data.concurrency = concurrency;
156
+ graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
141
157
  const graph = new GraphAI(graph_data, {
142
158
  ...vanillaAgents,
143
159
  fileWriteAgent,
@@ -7,42 +7,36 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
7
7
  import imageGoogleAgent from "../agents/image_google_agent.js";
8
8
  import imageOpenaiAgent from "../agents/image_openai_agent.js";
9
9
  import { MulmoScriptMethods } from "../methods/index.js";
10
- import { processChart, processMarkdown, processTextSlide, processImage, processMermaid } from "../utils/image_preprocess.js";
10
+ import { imagePlugins } from "../utils/image_plugins/index.js";
11
11
  const { default: __, ...vanillaAgents } = agents;
12
12
  dotenv.config();
13
13
  // const openai = new OpenAI();
14
14
  import { GoogleAuth } from "google-auth-library";
15
+ const htmlStyle = (script, beat) => {
16
+ return {
17
+ canvasSize: MulmoScriptMethods.getCanvasSize(script),
18
+ textSlideStyle: MulmoScriptMethods.getTextSlideStyle(script, beat),
19
+ };
20
+ };
15
21
  const imagePreprocessAgent = async (namedInputs) => {
16
22
  const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
17
23
  const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
18
- const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
19
24
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
20
- const aspectRatio = MulmoScriptMethods.getAspectRatio(context.studio.script);
21
- const textSlideStyle = MulmoScriptMethods.getTextSlideStyle(context.studio.script, beat);
25
+ const returnValue = {
26
+ aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
27
+ imageParams,
28
+ };
22
29
  if (beat.image) {
23
- const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
24
- const processorParams = { beat, context, imagePath, textSlideStyle, canvasSize };
25
- if (beat.image.type === "textSlide") {
26
- await processTextSlide(processorParams);
27
- }
28
- else if (beat.image.type === "markdown") {
29
- await processMarkdown(processorParams);
30
- }
31
- else if (beat.image.type === "image") {
32
- const path = processImage(processorParams);
33
- if (path) {
34
- // undefined prompt indicates that image generation is not needed
35
- return { path, prompt: undefined, imageParams, aspectRatio };
36
- }
37
- }
38
- else if (beat.image.type === "chart") {
39
- await processChart(processorParams);
40
- }
41
- else if (beat.image.type === "mermaid") {
42
- await processMermaid(processorParams);
30
+ const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
31
+ if (plugin) {
32
+ const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
33
+ const path = await plugin.process(processorParams);
34
+ // undefined prompt indicates that image generation is not needed
35
+ return { path, ...returnValue };
43
36
  }
44
37
  }
45
- return { path: imagePath, prompt, imageParams, aspectRatio };
38
+ const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
39
+ return { path: imagePath, prompt, ...returnValue };
46
40
  };
47
41
  const graph_data = {
48
42
  version: 0.5,
@@ -54,7 +48,7 @@ const graph_data = {
54
48
  outputStudioFilePath: {},
55
49
  map: {
56
50
  agent: "mapAgent",
57
- inputs: { rows: ":context.studio.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
51
+ inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
58
52
  isResult: true,
59
53
  params: {
60
54
  rowKey: "beat",
@@ -0,0 +1,5 @@
1
+ export * from "./audio.js";
2
+ export * from "./images.js";
3
+ export * from "./movie.js";
4
+ export * from "./pdf.js";
5
+ export * from "./translate.js";
@@ -0,0 +1,5 @@
1
+ export * from "./audio.js";
2
+ export * from "./images.js";
3
+ export * from "./movie.js";
4
+ export * from "./pdf.js";
5
+ export * from "./translate.js";
@@ -1,2 +1,10 @@
1
- import { MulmoStudioContext } from "../types/index.js";
1
+ import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
2
+ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
3
+ videoId: string;
4
+ videoPart: string;
5
+ };
6
+ export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
7
+ audioId: string;
8
+ audioPart: string;
9
+ };
2
10
  export declare const movie: (context: MulmoStudioContext) => Promise<void>;