mulmocast 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -9
- package/assets/font/NotoSansJP-Regular.ttf +0 -0
- package/assets/html/chart.html +1 -10
- package/assets/html/mermaid.html +1 -13
- package/assets/templates/business.json +16 -27
- package/assets/templates/coding.json +58 -21
- package/lib/actions/audio.d.ts +1 -1
- package/lib/actions/audio.js +43 -27
- package/lib/actions/images.js +20 -26
- package/lib/actions/index.d.ts +5 -0
- package/lib/actions/index.js +5 -0
- package/lib/actions/movie.d.ts +9 -1
- package/lib/actions/movie.js +97 -38
- package/lib/actions/pdf.d.ts +2 -0
- package/lib/actions/pdf.js +211 -0
- package/lib/actions/translate.js +22 -9
- package/lib/agents/combine_audio_files_agent.js +13 -22
- package/lib/cli/args.d.ts +3 -1
- package/lib/cli/args.js +49 -34
- package/lib/cli/cli.d.ts +15 -0
- package/lib/cli/cli.js +44 -47
- package/lib/cli/run.d.ts +1 -0
- package/lib/cli/run.js +2 -0
- package/lib/cli/tool-args.d.ts +2 -0
- package/lib/cli/tool-args.js +12 -2
- package/lib/cli/tool-cli.js +6 -4
- package/lib/methods/index.d.ts +1 -0
- package/lib/methods/index.js +1 -0
- package/lib/methods/mulmo_media_source.d.ts +4 -0
- package/lib/methods/mulmo_media_source.js +21 -0
- package/lib/methods/mulmo_script.d.ts +2 -6
- package/lib/methods/mulmo_script.js +12 -5
- package/lib/tools/create_mulmo_script_interactively.d.ts +1 -1
- package/lib/tools/create_mulmo_script_interactively.js +61 -20
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.js +1 -0
- package/lib/types/schema.d.ts +3626 -3162
- package/lib/types/schema.js +75 -41
- package/lib/types/type.d.ts +28 -1
- package/lib/utils/const.d.ts +2 -0
- package/lib/utils/const.js +2 -0
- package/lib/utils/file.d.ts +4 -1
- package/lib/utils/file.js +15 -1
- package/lib/utils/filters.js +1 -1
- package/lib/utils/image_plugins/chart.d.ts +3 -0
- package/lib/utils/image_plugins/chart.js +18 -0
- package/lib/utils/image_plugins/image.d.ts +2 -0
- package/lib/utils/image_plugins/image.js +3 -0
- package/lib/utils/image_plugins/index.d.ts +7 -0
- package/lib/utils/image_plugins/index.js +7 -0
- package/lib/utils/image_plugins/markdown.d.ts +3 -0
- package/lib/utils/image_plugins/markdown.js +11 -0
- package/lib/utils/image_plugins/mermaid.d.ts +3 -0
- package/lib/utils/image_plugins/mermaid.js +21 -0
- package/lib/utils/image_plugins/movie.d.ts +2 -0
- package/lib/utils/image_plugins/movie.js +3 -0
- package/lib/utils/image_plugins/source.d.ts +4 -0
- package/lib/utils/image_plugins/source.js +15 -0
- package/lib/utils/image_plugins/text_slide.d.ts +3 -0
- package/lib/utils/image_plugins/text_slide.js +12 -0
- package/lib/utils/image_plugins/type_guards.d.ts +6 -0
- package/lib/utils/image_plugins/type_guards.js +21 -0
- package/lib/utils/markdown.js +4 -1
- package/lib/utils/pdf.d.ts +8 -0
- package/lib/utils/pdf.js +75 -0
- package/lib/utils/preprocess.d.ts +58 -128
- package/lib/utils/preprocess.js +37 -37
- package/lib/utils/utils.d.ts +12 -0
- package/lib/utils/utils.js +34 -0
- package/package.json +21 -12
- package/lib/tools/seed.d.ts +0 -3
- package/lib/tools/seed.js +0 -201
- package/lib/tools/seed_from_url.d.ts +0 -3
- package/lib/tools/seed_from_url.js +0 -178
- package/lib/tools/seed_from_url2.d.ts +0 -3
- package/lib/tools/seed_from_url2.js +0 -154
- package/lib/utils/image_preprocess.d.ts +0 -14
- package/lib/utils/image_preprocess.js +0 -52
- package/lib/utils/text_hash.d.ts +0 -1
- package/lib/utils/text_hash.js +0 -4
package/README.md
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
# mulmocast
|
|
2
2
|
|
|
3
|
-
A CLI tool for generating podcast and video content from script files. Automates the process of creating audio, images, and video from structured
|
|
3
|
+
A CLI tool for generating podcast and video content from script files (MulmoScript). Automates the process of creating audio, images, and video from structured MulmoScript files.
|
|
4
|
+
|
|
5
|
+
## What is MulmoScript?
|
|
6
|
+
|
|
7
|
+
**MulmoScript** is a simple JSON/YAML format for describing multi-modal content.
|
|
8
|
+
You can define speakers, text, images, and layout — all in one script.
|
|
9
|
+
|
|
10
|
+
A Hello World script is available in [./docs/scripts](./docs/scripts).
|
|
11
|
+
See [MulmoScript Format](#mulmoscript-format) for details on the structure.
|
|
4
12
|
|
|
5
13
|
## Installation
|
|
6
14
|
|
|
@@ -22,11 +30,11 @@ brew install ffmpeg
|
|
|
22
30
|
Create a `.env` file in your project directory with the following API keys:
|
|
23
31
|
|
|
24
32
|
### Required
|
|
25
|
-
```
|
|
33
|
+
```bash
|
|
26
34
|
OPENAI_API_KEY=your_openai_api_key
|
|
27
35
|
```
|
|
28
36
|
### Optional
|
|
29
|
-
```
|
|
37
|
+
```bash
|
|
30
38
|
DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
|
|
31
39
|
GOOGLE_PROJECT_ID=your_google_project_id # for Google's image generation model
|
|
32
40
|
NIJIVOICE_API_KEY=your_nijivoice_api_key # for Nijivoice's TTS model
|
|
@@ -42,13 +50,13 @@ BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo-tool
|
|
|
42
50
|
|
|
43
51
|
## Quick Start
|
|
44
52
|
|
|
45
|
-
```
|
|
53
|
+
```bash
|
|
46
54
|
# Generate script with interactive mode
|
|
47
|
-
mulmo-tool scripting -i -t children_book -o ./ -
|
|
55
|
+
mulmo-tool scripting -i -t children_book -o ./ -s story
|
|
48
56
|
```
|
|
49
57
|
After running this command, you'll create a story script through an interactive conversation with the AI.
|
|
50
58
|
|
|
51
|
-
```
|
|
59
|
+
```bash
|
|
52
60
|
# Generate both audio and images, then combine into video
|
|
53
61
|
mulmo movie {generated_script_file}
|
|
54
62
|
```
|
|
@@ -57,7 +65,7 @@ Replace `{generated_script_file}` with the output file from the previous command
|
|
|
57
65
|
## Generate MulmoScript
|
|
58
66
|
|
|
59
67
|
```bash
|
|
60
|
-
# Generate script from web content
|
|
68
|
+
# Generate script from web content (requires Browserless API KEY)
|
|
61
69
|
mulmo-tool scripting -u https://example.com
|
|
62
70
|
|
|
63
71
|
# Generate script with interactive mode
|
|
@@ -113,8 +121,13 @@ https://github.com/receptron/mulmocast-cli/tree/main/scripts
|
|
|
113
121
|
|
|
114
122
|
CLI Usage
|
|
115
123
|
|
|
124
|
+
### `mulmo -h` Output
|
|
125
|
+
Use this command to generate content (audio, images, movie) from an existing MulmoScript.
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
mulmo -h
|
|
116
129
|
```
|
|
117
|
-
|
|
130
|
+
```bash
|
|
118
131
|
mulmo <action> <file>
|
|
119
132
|
|
|
120
133
|
Run mulmocast
|
|
@@ -136,9 +149,14 @@ Options:
|
|
|
136
149
|
|
|
137
150
|
```
|
|
138
151
|
|
|
152
|
+
### `mulmo-tool -h` Output
|
|
153
|
+
Use this command to generate a new MulmoScript from a URL or interactively.
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
mulmo-tool -h
|
|
139
157
|
```
|
|
140
|
-
$ mulmo-tool -h
|
|
141
158
|
|
|
159
|
+
```bash
|
|
142
160
|
mulmo-tool <action>
|
|
143
161
|
|
|
144
162
|
Run mulmocast tool
|
|
Binary file
|
package/assets/html/chart.html
CHANGED
|
@@ -5,20 +5,11 @@
|
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
6
|
<title>Simple Chart.js Bar Chart</title>
|
|
7
7
|
<style>
|
|
8
|
-
|
|
9
|
-
font-family: Arial, sans-serif;
|
|
10
|
-
padding: 40px; padding-top: 60px;
|
|
11
|
-
}
|
|
8
|
+
${style}
|
|
12
9
|
.chart-container {
|
|
13
10
|
width: ${width}px;
|
|
14
11
|
margin: 0 auto;
|
|
15
12
|
}
|
|
16
|
-
h1 {
|
|
17
|
-
text-align: center;
|
|
18
|
-
font-size: 50px;
|
|
19
|
-
color: #333;
|
|
20
|
-
margin-bottom: 80px;
|
|
21
|
-
}
|
|
22
13
|
</style>
|
|
23
14
|
<!-- Include Chart.js from CDN -->
|
|
24
15
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
package/assets/html/mermaid.html
CHANGED
|
@@ -5,24 +5,12 @@
|
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
6
|
<title>Mermaid Diagram</title>
|
|
7
7
|
<style>
|
|
8
|
-
|
|
9
|
-
box-sizing: border-box;
|
|
10
|
-
font-family: Arial, sans-serif;
|
|
11
|
-
padding: 40px;
|
|
12
|
-
padding-top: 60px;
|
|
13
|
-
height: 100vh;
|
|
14
|
-
}
|
|
8
|
+
${style}
|
|
15
9
|
.container {
|
|
16
10
|
height: 100%;
|
|
17
11
|
display: flex;
|
|
18
12
|
flex-direction: column;
|
|
19
13
|
}
|
|
20
|
-
h1 {
|
|
21
|
-
text-align: center;
|
|
22
|
-
font-size: 50px;
|
|
23
|
-
color: #333;
|
|
24
|
-
margin-bottom: 30px;
|
|
25
|
-
}
|
|
26
14
|
.mermaid {
|
|
27
15
|
width: 100%;
|
|
28
16
|
height: 100%;
|
|
@@ -1,20 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Business presentation",
|
|
3
3
|
"description": "Template for business presentation.",
|
|
4
|
-
"systemPrompt": "Generate a script for a business presentation of the given topic. Use textSlides or
|
|
4
|
+
"systemPrompt": "Generate a script for a business presentation of the given topic. Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article to reuse them in the presentation. Use the JSON below as a template. chartData is the data for Chart.js",
|
|
5
5
|
"script": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.0",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"title": "Sample Title",
|
|
11
|
-
"canvasSize": {
|
|
12
|
-
"width": 1536,
|
|
13
|
-
"height": 1024
|
|
14
|
-
},
|
|
15
|
-
"imageParams": {
|
|
16
|
-
"style": "Style appropriate for business environment."
|
|
17
|
-
},
|
|
18
11
|
"speechParams": {
|
|
19
12
|
"speakers": {
|
|
20
13
|
"Presenter": {
|
|
@@ -26,19 +19,6 @@
|
|
|
26
19
|
}
|
|
27
20
|
}
|
|
28
21
|
},
|
|
29
|
-
"textSlideParams": {
|
|
30
|
-
"cssStyles": [
|
|
31
|
-
"body { padding: 40px; padding-top: 60px; font-family: Arial, sans-serif;color:#333; font-size: 36px }",
|
|
32
|
-
"h1 { font-size: 50px; text-align: center }",
|
|
33
|
-
"ul { margin-left: 36px } ",
|
|
34
|
-
"pre { background: #eeeecc; font-size: 24px; padding:10px }",
|
|
35
|
-
"p { margin-left: 36px }",
|
|
36
|
-
"table { font-size: 36px; margin: auto; border: 1px solid gray; border-collapse: collapse }",
|
|
37
|
-
"th { border-bottom: 1px solid gray }",
|
|
38
|
-
"td, th { padding: 8px }",
|
|
39
|
-
"tr:nth-child(even) { background-color: #eee }"
|
|
40
|
-
]
|
|
41
|
-
},
|
|
42
22
|
"lang": "en",
|
|
43
23
|
"beats": [
|
|
44
24
|
{
|
|
@@ -90,11 +70,6 @@
|
|
|
90
70
|
]
|
|
91
71
|
}
|
|
92
72
|
},
|
|
93
|
-
{
|
|
94
|
-
"speaker": "Presenter",
|
|
95
|
-
"text": "Our story begins with tree-dwelling primates—small mammals with forward-facing eyes and grasping hands. These ancestors lived in forests and evolved traits useful for climbing and visual depth perception.",
|
|
96
|
-
"imagePrompt": "tree-dwelling primates with forward-facing eyes in a jungle."
|
|
97
|
-
},
|
|
98
73
|
{
|
|
99
74
|
"speaker": "Presenter",
|
|
100
75
|
"text": "This page shows the sales and profits of this company from January 2024 to June 2024.",
|
|
@@ -135,7 +110,21 @@
|
|
|
135
110
|
"image": {
|
|
136
111
|
"type": "mermaid",
|
|
137
112
|
"title": "Business Process Flow",
|
|
138
|
-
"code":
|
|
113
|
+
"code": {
|
|
114
|
+
"kind": "text",
|
|
115
|
+
"text": "graph LR\n A[Market Research] --> B[Product Planning]\n B --> C[Development]\n C --> D[Testing]\n D --> E[Manufacturing]\n E --> F[Marketing]\n F --> G[Sales]\n G --> H[Customer Support]\n H --> A"
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"speaker": "Presenter",
|
|
121
|
+
"text": "This is the image of a pingpong ball.",
|
|
122
|
+
"image": {
|
|
123
|
+
"type": "image",
|
|
124
|
+
"source": {
|
|
125
|
+
"kind": "url",
|
|
126
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.png"
|
|
127
|
+
}
|
|
139
128
|
}
|
|
140
129
|
}
|
|
141
130
|
]
|
|
@@ -1,20 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Coding presentation",
|
|
3
3
|
"description": "Template for software and coding presentation.",
|
|
4
|
-
"systemPrompt": "Generate a script for a technical presentation of the given topic. Use markdown with a code block to show some code on a slide. Use the JSON below as a template.",
|
|
4
|
+
"systemPrompt": "Generate a script for a technical presentation of the given topic. Use markdown with a code block to show some code on a slide. Avoid long coding examples, which may not fit in a single slide. Use the JSON below as a template.",
|
|
5
5
|
"script": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.0",
|
|
8
8
|
"credit": "closing"
|
|
9
9
|
},
|
|
10
10
|
"title": "Sample Title",
|
|
11
|
-
"canvasSize": {
|
|
12
|
-
"width": 1536,
|
|
13
|
-
"height": 1024
|
|
14
|
-
},
|
|
15
|
-
"imageParams": {
|
|
16
|
-
"style": "Style appropriate for software presentation."
|
|
17
|
-
},
|
|
18
11
|
"speechParams": {
|
|
19
12
|
"speakers": {
|
|
20
13
|
"Presenter": {
|
|
@@ -27,19 +20,6 @@
|
|
|
27
20
|
}
|
|
28
21
|
},
|
|
29
22
|
"lang": "en",
|
|
30
|
-
"textSlideParams": {
|
|
31
|
-
"cssStyles": [
|
|
32
|
-
"body { margin: 40px; margin-top: 60px; color:#333; font-size: 48px }",
|
|
33
|
-
"h1 { font-size: 60px; text-align: center }",
|
|
34
|
-
"ul { margin-left: 40px } ",
|
|
35
|
-
"pre { background: #eeeecc; font-size: 24px; padding:10px }",
|
|
36
|
-
"p { margin-left: 40px }",
|
|
37
|
-
"table { font-size: 40px; margin: auto; border: 1px solid gray; border-collapse: collapse }",
|
|
38
|
-
"th { border-bottom: 1px solid gray }",
|
|
39
|
-
"td, th { padding: 8px }",
|
|
40
|
-
"tr:nth-child(even) { background-color: #eee }"
|
|
41
|
-
]
|
|
42
|
-
},
|
|
43
23
|
"beats": [
|
|
44
24
|
{
|
|
45
25
|
"speaker": "Presenter",
|
|
@@ -97,6 +77,63 @@
|
|
|
97
77
|
"| Codecademy Hoodie | False | 42.99 |"
|
|
98
78
|
]
|
|
99
79
|
}
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"speaker": "Presenter",
|
|
83
|
+
"text": "Next, let's look at a diagram of our business process flow. This illustrates the key steps from product development to sales.",
|
|
84
|
+
"image": {
|
|
85
|
+
"type": "mermaid",
|
|
86
|
+
"title": "Business Process Flow",
|
|
87
|
+
"code": {
|
|
88
|
+
"kind": "text",
|
|
89
|
+
"text": "graph LR\n A[Market Research] --> B[Product Planning]\n B --> C[Development]\n C --> D[Testing]\n D --> E[Manufacturing]\n E --> F[Marketing]\n F --> G[Sales]\n G --> H[Customer Support]\n H --> A"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"speaker": "Presenter",
|
|
95
|
+
"text": "This page shows the sales and profits of this company from January 2024 to June 2024.",
|
|
96
|
+
"image": {
|
|
97
|
+
"type": "chart",
|
|
98
|
+
"title": "Sales and Profits (from Jan to June)",
|
|
99
|
+
"chartData": {
|
|
100
|
+
"type": "bar",
|
|
101
|
+
"data": {
|
|
102
|
+
"labels": ["January", "February", "March", "April", "May", "June"],
|
|
103
|
+
"datasets": [
|
|
104
|
+
{
|
|
105
|
+
"label": "Revenue ($1000s)",
|
|
106
|
+
"data": [120, 135, 180, 155, 170, 190],
|
|
107
|
+
"backgroundColor": "rgba(54, 162, 235, 0.5)",
|
|
108
|
+
"borderColor": "rgba(54, 162, 235, 1)",
|
|
109
|
+
"borderWidth": 1
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"label": "Profit ($1000s)",
|
|
113
|
+
"data": [45, 52, 68, 53, 61, 73],
|
|
114
|
+
"backgroundColor": "rgba(75, 192, 192, 0.5)",
|
|
115
|
+
"borderColor": "rgba(75, 192, 192, 1)",
|
|
116
|
+
"borderWidth": 1
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
|
+
},
|
|
120
|
+
"options": {
|
|
121
|
+
"responsive": true,
|
|
122
|
+
"animation": false
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"speaker": "Presenter",
|
|
129
|
+
"text": "This is the image of a pingpong ball.",
|
|
130
|
+
"image": {
|
|
131
|
+
"type": "image",
|
|
132
|
+
"source": {
|
|
133
|
+
"kind": "url",
|
|
134
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.png"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
100
137
|
}
|
|
101
138
|
]
|
|
102
139
|
}
|
package/lib/actions/audio.d.ts
CHANGED
package/lib/actions/audio.js
CHANGED
|
@@ -8,43 +8,59 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
9
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
10
10
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
11
|
-
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, } from "../utils/file.js";
|
|
11
|
+
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
12
|
+
import { text2hash } from "../utils/utils.js";
|
|
12
13
|
const { default: __, ...vanillaAgents } = agents;
|
|
13
14
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
14
15
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
16
|
+
const provider_to_agent = {
|
|
17
|
+
nijivoice: "ttsNijivoiceAgent",
|
|
18
|
+
openai: "ttsOpenaiAgent",
|
|
19
|
+
};
|
|
20
|
+
const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
21
|
+
if (beat.audio?.type === "audio") {
|
|
22
|
+
const path = resolveMediaSource(beat.audio.source, context);
|
|
23
|
+
if (path) {
|
|
24
|
+
return path;
|
|
25
|
+
}
|
|
26
|
+
throw new Error("Invalid audio source");
|
|
27
|
+
}
|
|
28
|
+
return getAudioSegmentFilePath(audioDirPath, context.studio.filename, audioFile);
|
|
29
|
+
};
|
|
30
|
+
const preprocessor = (namedInputs) => {
|
|
31
|
+
const { beat, index, context, audioDirPath } = namedInputs;
|
|
32
|
+
const studioBeat = context.studio.beats[index];
|
|
33
|
+
const voiceId = context.studio.script.speechParams.speakers[beat.speaker].voiceId;
|
|
34
|
+
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
35
|
+
const hash_string = `${beat.text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}`;
|
|
36
|
+
const audioFile = `${context.studio.filename}_${index}_${text2hash(hash_string)}`;
|
|
37
|
+
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
38
|
+
studioBeat.audioFile = audioPath;
|
|
39
|
+
return {
|
|
40
|
+
ttsAgent: provider_to_agent[context.studio.script.speechParams.provider],
|
|
41
|
+
studioBeat,
|
|
42
|
+
voiceId,
|
|
43
|
+
speechOptions,
|
|
44
|
+
audioPath,
|
|
45
|
+
};
|
|
46
|
+
};
|
|
15
47
|
const graph_tts = {
|
|
16
48
|
nodes: {
|
|
17
49
|
preprocessor: {
|
|
18
|
-
agent:
|
|
19
|
-
const { beat, script, speakers } = namedInputs;
|
|
20
|
-
return {
|
|
21
|
-
voiceId: speakers[beat.speaker].voiceId,
|
|
22
|
-
speechOptions: MulmoScriptMethods.getSpeechOptions(script, beat),
|
|
23
|
-
};
|
|
24
|
-
},
|
|
50
|
+
agent: preprocessor,
|
|
25
51
|
inputs: {
|
|
26
52
|
beat: ":beat",
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
},
|
|
31
|
-
ttsAgent: {
|
|
32
|
-
agent: (namedInputs) => {
|
|
33
|
-
if (namedInputs.provider === "nijivoice") {
|
|
34
|
-
return "ttsNijivoiceAgent";
|
|
35
|
-
}
|
|
36
|
-
return "ttsOpenaiAgent";
|
|
37
|
-
},
|
|
38
|
-
inputs: {
|
|
39
|
-
provider: ":script.speechParams.provider",
|
|
53
|
+
index: ":__mapIndex",
|
|
54
|
+
context: ":context",
|
|
55
|
+
audioDirPath: ":audioDirPath",
|
|
40
56
|
},
|
|
41
57
|
},
|
|
42
58
|
tts: {
|
|
43
59
|
unless: ":beat.audio",
|
|
44
|
-
agent: ":ttsAgent",
|
|
60
|
+
agent: ":preprocessor.ttsAgent",
|
|
45
61
|
inputs: {
|
|
46
62
|
text: ":beat.text",
|
|
47
|
-
file: "
|
|
63
|
+
file: ":preprocessor.audioPath",
|
|
48
64
|
force: ":context.force",
|
|
49
65
|
},
|
|
50
66
|
params: {
|
|
@@ -68,8 +84,8 @@ const graph_data = {
|
|
|
68
84
|
map: {
|
|
69
85
|
agent: "mapAgent",
|
|
70
86
|
inputs: {
|
|
71
|
-
rows: ":context.studio.beats",
|
|
72
|
-
|
|
87
|
+
rows: ":context.studio.script.beats",
|
|
88
|
+
studio: ":context.studio",
|
|
73
89
|
audioDirPath: ":audioDirPath",
|
|
74
90
|
audioSegmentDirPath: ":audioSegmentDirPath",
|
|
75
91
|
context: ":context",
|
|
@@ -128,7 +144,7 @@ const agentFilters = [
|
|
|
128
144
|
nodeIds: ["tts"],
|
|
129
145
|
},
|
|
130
146
|
];
|
|
131
|
-
export const audio = async (context
|
|
147
|
+
export const audio = async (context) => {
|
|
132
148
|
const { studio, fileDirs } = context;
|
|
133
149
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
134
150
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
@@ -137,7 +153,7 @@ export const audio = async (context, concurrency) => {
|
|
|
137
153
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
138
154
|
mkdir(outDirPath);
|
|
139
155
|
mkdir(audioSegmentDirPath);
|
|
140
|
-
graph_data.concurrency =
|
|
156
|
+
graph_data.concurrency = MulmoScriptMethods.getSpeechProvider(studio.script) === "nijivoice" ? 1 : 8;
|
|
141
157
|
const graph = new GraphAI(graph_data, {
|
|
142
158
|
...vanillaAgents,
|
|
143
159
|
fileWriteAgent,
|
package/lib/actions/images.js
CHANGED
|
@@ -7,42 +7,36 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
7
7
|
import imageGoogleAgent from "../agents/image_google_agent.js";
|
|
8
8
|
import imageOpenaiAgent from "../agents/image_openai_agent.js";
|
|
9
9
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
10
|
-
import {
|
|
10
|
+
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
11
|
const { default: __, ...vanillaAgents } = agents;
|
|
12
12
|
dotenv.config();
|
|
13
13
|
// const openai = new OpenAI();
|
|
14
14
|
import { GoogleAuth } from "google-auth-library";
|
|
15
|
+
const htmlStyle = (script, beat) => {
|
|
16
|
+
return {
|
|
17
|
+
canvasSize: MulmoScriptMethods.getCanvasSize(script),
|
|
18
|
+
textSlideStyle: MulmoScriptMethods.getTextSlideStyle(script, beat),
|
|
19
|
+
};
|
|
20
|
+
};
|
|
15
21
|
const imagePreprocessAgent = async (namedInputs) => {
|
|
16
22
|
const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
|
|
17
23
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
18
|
-
const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
|
|
19
24
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
20
|
-
const
|
|
21
|
-
|
|
25
|
+
const returnValue = {
|
|
26
|
+
aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
|
|
27
|
+
imageParams,
|
|
28
|
+
};
|
|
22
29
|
if (beat.image) {
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
await
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
await processMarkdown(processorParams);
|
|
30
|
-
}
|
|
31
|
-
else if (beat.image.type === "image") {
|
|
32
|
-
const path = processImage(processorParams);
|
|
33
|
-
if (path) {
|
|
34
|
-
// undefined prompt indicates that image generation is not needed
|
|
35
|
-
return { path, prompt: undefined, imageParams, aspectRatio };
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
else if (beat.image.type === "chart") {
|
|
39
|
-
await processChart(processorParams);
|
|
40
|
-
}
|
|
41
|
-
else if (beat.image.type === "mermaid") {
|
|
42
|
-
await processMermaid(processorParams);
|
|
30
|
+
const plugin = imagePlugins.find((plugin) => plugin.imageType === beat?.image?.type);
|
|
31
|
+
if (plugin) {
|
|
32
|
+
const processorParams = { beat, context, imagePath, ...htmlStyle(context.studio.script, beat) };
|
|
33
|
+
const path = await plugin.process(processorParams);
|
|
34
|
+
// undefined prompt indicates that image generation is not needed
|
|
35
|
+
return { path, ...returnValue };
|
|
43
36
|
}
|
|
44
37
|
}
|
|
45
|
-
|
|
38
|
+
const prompt = (beat.imagePrompt || beat.text) + "\n" + (imageParams.style || "");
|
|
39
|
+
return { path: imagePath, prompt, ...returnValue };
|
|
46
40
|
};
|
|
47
41
|
const graph_data = {
|
|
48
42
|
version: 0.5,
|
|
@@ -54,7 +48,7 @@ const graph_data = {
|
|
|
54
48
|
outputStudioFilePath: {},
|
|
55
49
|
map: {
|
|
56
50
|
agent: "mapAgent",
|
|
57
|
-
inputs: { rows: ":context.studio.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
|
|
51
|
+
inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
|
|
58
52
|
isResult: true,
|
|
59
53
|
params: {
|
|
60
54
|
rowKey: "beat",
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -1,2 +1,10 @@
|
|
|
1
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
1
|
+
import { MulmoStudioContext, MulmoCanvasDimension, BeatMediaType } from "../types/index.js";
|
|
2
|
+
export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType, duration: number, canvasInfo: MulmoCanvasDimension) => {
|
|
3
|
+
videoId: string;
|
|
4
|
+
videoPart: string;
|
|
5
|
+
};
|
|
6
|
+
export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
|
|
7
|
+
audioId: string;
|
|
8
|
+
audioPart: string;
|
|
9
|
+
};
|
|
2
10
|
export declare const movie: (context: MulmoStudioContext) => Promise<void>;
|