mulmocast 0.0.14 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/assets/html/pdf_handout.html +85 -0
- package/assets/html/pdf_slide.html +55 -0
- package/assets/html/pdf_talk.html +76 -0
- package/assets/templates/text_and_image.json +6 -0
- package/assets/templates/text_only.json +6 -0
- package/lib/actions/audio.d.ts +3 -1
- package/lib/actions/audio.js +84 -45
- package/lib/actions/captions.js +1 -1
- package/lib/actions/images.d.ts +89 -1
- package/lib/actions/images.js +160 -99
- package/lib/actions/movie.js +28 -21
- package/lib/actions/pdf.d.ts +1 -0
- package/lib/actions/pdf.js +134 -204
- package/lib/actions/translate.js +1 -1
- package/lib/agents/add_bgm_agent.js +3 -3
- package/lib/agents/combine_audio_files_agent.js +11 -9
- package/lib/agents/image_mock_agent.d.ts +4 -0
- package/lib/agents/image_mock_agent.js +18 -0
- package/lib/agents/index.d.ts +4 -1
- package/lib/agents/index.js +4 -1
- package/lib/agents/media_mock_agent.d.ts +4 -0
- package/lib/agents/media_mock_agent.js +18 -0
- package/lib/agents/tavily_agent.d.ts +15 -0
- package/lib/agents/tavily_agent.js +130 -0
- package/lib/agents/tts_openai_agent.js +9 -1
- package/lib/cli/commands/audio/builder.d.ts +4 -0
- package/lib/cli/commands/image/builder.d.ts +4 -0
- package/lib/cli/commands/movie/builder.d.ts +4 -0
- package/lib/cli/commands/pdf/builder.d.ts +4 -0
- package/lib/cli/commands/translate/builder.d.ts +4 -0
- package/lib/cli/common.d.ts +4 -0
- package/lib/cli/common.js +11 -0
- package/lib/cli/helpers.d.ts +5 -1
- package/lib/cli/helpers.js +19 -2
- package/lib/methods/index.d.ts +1 -1
- package/lib/methods/index.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +14 -0
- package/lib/methods/mulmo_presentation_style.js +70 -0
- package/lib/methods/mulmo_script.d.ts +1 -1
- package/lib/methods/mulmo_script.js +2 -2
- package/lib/methods/mulmo_studio_context.d.ts +14 -0
- package/lib/methods/mulmo_studio_context.js +20 -2
- package/lib/tools/deep_research.d.ts +2 -0
- package/lib/tools/deep_research.js +265 -0
- package/lib/types/schema.d.ts +31 -0
- package/lib/types/schema.js +1 -1
- package/lib/types/type.d.ts +4 -1
- package/lib/utils/ffmpeg_utils.d.ts +1 -0
- package/lib/utils/ffmpeg_utils.js +10 -0
- package/lib/utils/file.d.ts +1 -3
- package/lib/utils/file.js +4 -11
- package/lib/utils/filters.js +1 -0
- package/lib/utils/markdown.js +1 -1
- package/lib/utils/preprocess.js +1 -0
- package/lib/utils/prompt.d.ts +3 -0
- package/lib/utils/prompt.js +52 -0
- package/package.json +10 -10
- package/assets/font/NotoSansJP-Regular.ttf +0 -0
- package/assets/music/StarsBeyondEx.mp3 +0 -0
package/README.md
CHANGED
|
@@ -103,9 +103,13 @@ GOOGLE_PROJECT_ID=your_google_project_id
|
|
|
103
103
|
|
|
104
104
|
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
105
105
|
|
|
106
|
-
#### (Optional) For
|
|
106
|
+
#### (Optional) For TTS models
|
|
107
107
|
```bash
|
|
108
|
+
# For Nijivoice TTS
|
|
108
109
|
NIJIVOICE_API_KEY=your_nijivoice_api_key
|
|
110
|
+
|
|
111
|
+
# For ElevenLabs TTS
|
|
112
|
+
ELEVENLABS_API_KEY=your_elevenlabs_api_key
|
|
109
113
|
```
|
|
110
114
|
|
|
111
115
|
#### (Optional) to access web in mulmo tool
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="${lang}">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>${title}</title>
|
|
7
|
+
<style>
|
|
8
|
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
|
|
9
|
+
|
|
10
|
+
* {
|
|
11
|
+
margin: 0;
|
|
12
|
+
padding: 0;
|
|
13
|
+
box-sizing: border-box;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
body {
|
|
17
|
+
font-family: 'Noto Sans JP', sans-serif;
|
|
18
|
+
font-size: 16px;
|
|
19
|
+
line-height: 1.6;
|
|
20
|
+
color: #333;
|
|
21
|
+
background: #fff;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@page {
|
|
25
|
+
size: ${page_size};
|
|
26
|
+
margin: 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
.page {
|
|
30
|
+
page-break-after: always;
|
|
31
|
+
width: 100%;
|
|
32
|
+
height: 100vh;
|
|
33
|
+
position: relative;
|
|
34
|
+
overflow: hidden;
|
|
35
|
+
padding: 15px;
|
|
36
|
+
display: ${page_layout};
|
|
37
|
+
${page_direction}
|
|
38
|
+
gap: 15px;
|
|
39
|
+
background: #fff;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
.page:last-child {
|
|
43
|
+
page-break-after: avoid;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
img {
|
|
47
|
+
max-width: 100%;
|
|
48
|
+
max-height: 100%;
|
|
49
|
+
object-fit: contain;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
.handout-item {
|
|
53
|
+
display: flex;
|
|
54
|
+
flex-direction: ${flex_direction};
|
|
55
|
+
border: 1px solid #ddd;
|
|
56
|
+
overflow: hidden;
|
|
57
|
+
${item_flex}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
.handout-image {
|
|
61
|
+
${image_size}
|
|
62
|
+
display: flex;
|
|
63
|
+
align-items: center;
|
|
64
|
+
justify-content: center;
|
|
65
|
+
background: #f9f9f9;
|
|
66
|
+
padding: 5px;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
.handout-text {
|
|
70
|
+
${text_size}
|
|
71
|
+
padding: 8px;
|
|
72
|
+
font-size: 14px;
|
|
73
|
+
overflow: hidden;
|
|
74
|
+
background: #fff;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
.handout-text p {
|
|
78
|
+
margin: 0.3em 0;
|
|
79
|
+
}
|
|
80
|
+
</style>
|
|
81
|
+
</head>
|
|
82
|
+
<body>
|
|
83
|
+
${pages}
|
|
84
|
+
</body>
|
|
85
|
+
</html>
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="${lang}">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>${title}</title>
|
|
7
|
+
<style>
|
|
8
|
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
|
|
9
|
+
|
|
10
|
+
* {
|
|
11
|
+
margin: 0;
|
|
12
|
+
padding: 0;
|
|
13
|
+
box-sizing: border-box;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
body {
|
|
17
|
+
font-family: 'Noto Sans JP', sans-serif;
|
|
18
|
+
font-size: 14px;
|
|
19
|
+
line-height: 1.6;
|
|
20
|
+
color: #333;
|
|
21
|
+
background: #fff;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@page {
|
|
25
|
+
size: ${page_size};
|
|
26
|
+
margin: 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
.page {
|
|
30
|
+
page-break-after: always;
|
|
31
|
+
width: 100%;
|
|
32
|
+
height: 100vh;
|
|
33
|
+
position: relative;
|
|
34
|
+
overflow: hidden;
|
|
35
|
+
display: flex;
|
|
36
|
+
align-items: center;
|
|
37
|
+
justify-content: center;
|
|
38
|
+
background: #fff;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
.page:last-child {
|
|
42
|
+
page-break-after: avoid;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
img {
|
|
46
|
+
max-width: 100%;
|
|
47
|
+
max-height: 100%;
|
|
48
|
+
object-fit: contain;
|
|
49
|
+
}
|
|
50
|
+
</style>
|
|
51
|
+
</head>
|
|
52
|
+
<body>
|
|
53
|
+
${pages}
|
|
54
|
+
</body>
|
|
55
|
+
</html>
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="${lang}">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>${title}</title>
|
|
7
|
+
<style>
|
|
8
|
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@400;700&display=swap');
|
|
9
|
+
|
|
10
|
+
* {
|
|
11
|
+
margin: 0;
|
|
12
|
+
padding: 0;
|
|
13
|
+
box-sizing: border-box;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
body {
|
|
17
|
+
font-family: 'Noto Sans JP', sans-serif;
|
|
18
|
+
font-size: 17px;
|
|
19
|
+
line-height: 1.4;
|
|
20
|
+
color: #333;
|
|
21
|
+
background: #fff;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@page {
|
|
25
|
+
size: ${page_size};
|
|
26
|
+
margin: 0;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
.page {
|
|
30
|
+
page-break-after: always;
|
|
31
|
+
width: 100%;
|
|
32
|
+
height: 100vh;
|
|
33
|
+
position: relative;
|
|
34
|
+
overflow: hidden;
|
|
35
|
+
padding: 20px;
|
|
36
|
+
display: flex;
|
|
37
|
+
flex-direction: column;
|
|
38
|
+
background: #fff;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
.page:last-child {
|
|
42
|
+
page-break-after: avoid;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
img {
|
|
46
|
+
max-width: 100%;
|
|
47
|
+
max-height: 100%;
|
|
48
|
+
object-fit: contain;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
.image-container {
|
|
52
|
+
flex: 1;
|
|
53
|
+
display: flex;
|
|
54
|
+
align-items: center;
|
|
55
|
+
justify-content: center;
|
|
56
|
+
margin-bottom: 20px;
|
|
57
|
+
border: 1px solid #ddd;
|
|
58
|
+
background: #f9f9f9;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
.text-container {
|
|
62
|
+
padding: 10px;
|
|
63
|
+
background: #fff;
|
|
64
|
+
border-top: 2px solid #333;
|
|
65
|
+
min-height: 120px;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
.text-container p {
|
|
69
|
+
margin: 0.5em 0;
|
|
70
|
+
}
|
|
71
|
+
</style>
|
|
72
|
+
</head>
|
|
73
|
+
<body>
|
|
74
|
+
${pages}
|
|
75
|
+
</body>
|
|
76
|
+
</html>
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text and Image",
|
|
3
|
+
"description": "Template for Text and Image Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the imagePrompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "image_prompts_template.json"
|
|
6
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Text Only",
|
|
3
|
+
"description": "Template for Text Only Script.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"scriptName": "text_only_template.json"
|
|
6
|
+
}
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { CallbackFunction } from "graphai";
|
|
3
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
+
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
|
+
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
4
5
|
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
6
|
+
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
5
7
|
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI } from "graphai";
|
|
3
|
+
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
3
4
|
import * as agents from "@graphai/vanilla";
|
|
4
5
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
6
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
@@ -8,9 +9,9 @@ import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
|
8
9
|
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
10
|
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
11
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
|
-
import {
|
|
12
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
12
13
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
-
import { getAudioArtifactFilePath,
|
|
14
|
+
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
14
15
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
15
16
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
17
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
@@ -22,8 +23,9 @@ const provider_to_agent = {
|
|
|
22
23
|
openai: "ttsOpenaiAgent",
|
|
23
24
|
google: "ttsGoogleAgent",
|
|
24
25
|
elevenlabs: "ttsElevenlabsAgent",
|
|
26
|
+
mock: "mediaMockAgent",
|
|
25
27
|
};
|
|
26
|
-
const getAudioPath = (context, beat, audioFile
|
|
28
|
+
const getAudioPath = (context, beat, audioFile) => {
|
|
27
29
|
if (beat.audio?.type === "audio") {
|
|
28
30
|
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
29
31
|
if (path) {
|
|
@@ -31,37 +33,51 @@ const getAudioPath = (context, beat, audioFile, audioDirPath) => {
|
|
|
31
33
|
}
|
|
32
34
|
throw new Error("Invalid audio source");
|
|
33
35
|
}
|
|
34
|
-
if (beat.text === "") {
|
|
36
|
+
if (beat.text === undefined || beat.text === "") {
|
|
35
37
|
return undefined; // It indicates that the audio is not needed.
|
|
36
38
|
}
|
|
37
|
-
return
|
|
39
|
+
return audioFile;
|
|
40
|
+
};
|
|
41
|
+
const getAudioParam = (presentationStyle, beat) => {
|
|
42
|
+
const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
|
|
43
|
+
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
44
|
+
const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
|
|
45
|
+
const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
|
|
46
|
+
return { voiceId, provider, speechOptions };
|
|
47
|
+
};
|
|
48
|
+
export const getBeatAudioPath = (text, context, beat, lang) => {
|
|
49
|
+
const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
|
|
50
|
+
const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
|
|
51
|
+
const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
|
|
52
|
+
const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
|
|
53
|
+
const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
|
|
54
|
+
return getAudioPath(context, beat, audioFile);
|
|
38
55
|
};
|
|
39
56
|
const preprocessor = (namedInputs) => {
|
|
40
|
-
const { beat, studioBeat, multiLingual, context
|
|
41
|
-
const { lang } = context;
|
|
42
|
-
const speaker = context.studio.script.speechParams.speakers[beat.speaker];
|
|
43
|
-
const voiceId = speaker.voiceId;
|
|
44
|
-
const speechOptions = MulmoScriptMethods.getSpeechOptions(context.studio.script, beat);
|
|
57
|
+
const { beat, studioBeat, multiLingual, context } = namedInputs;
|
|
58
|
+
const { lang, presentationStyle } = context;
|
|
45
59
|
const text = localizedText(beat, multiLingual, lang);
|
|
46
|
-
|
|
47
|
-
const
|
|
48
|
-
const hash_string = `${text}${voiceId}${speechOptions?.instruction ?? ""}${speechOptions?.speed ?? 1.0}${provider}`;
|
|
49
|
-
const audioFile = `${context.studio.filename}_${text2hash(hash_string)}` + (lang ? `_${lang}` : "");
|
|
50
|
-
const audioPath = getAudioPath(context, beat, audioFile, audioDirPath);
|
|
60
|
+
const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
|
|
61
|
+
const audioPath = getBeatAudioPath(text, context, beat, lang);
|
|
51
62
|
studioBeat.audioFile = audioPath;
|
|
52
63
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
53
64
|
return {
|
|
54
65
|
ttsAgent: provider_to_agent[provider],
|
|
55
|
-
|
|
66
|
+
text,
|
|
56
67
|
voiceId,
|
|
57
68
|
speechOptions,
|
|
58
69
|
audioPath,
|
|
59
|
-
|
|
70
|
+
studioBeat,
|
|
60
71
|
needsTTS,
|
|
61
72
|
};
|
|
62
73
|
};
|
|
63
74
|
const graph_tts = {
|
|
64
75
|
nodes: {
|
|
76
|
+
beat: {},
|
|
77
|
+
studioBeat: {},
|
|
78
|
+
multiLingual: {},
|
|
79
|
+
context: {},
|
|
80
|
+
__mapIndex: {},
|
|
65
81
|
preprocessor: {
|
|
66
82
|
agent: preprocessor,
|
|
67
83
|
inputs: {
|
|
@@ -69,7 +85,6 @@ const graph_tts = {
|
|
|
69
85
|
studioBeat: ":studioBeat",
|
|
70
86
|
multiLingual: ":multiLingual",
|
|
71
87
|
context: ":context",
|
|
72
|
-
audioDirPath: ":audioDirPath",
|
|
73
88
|
},
|
|
74
89
|
},
|
|
75
90
|
tts: {
|
|
@@ -99,8 +114,6 @@ const graph_data = {
|
|
|
99
114
|
audioArtifactFilePath: {},
|
|
100
115
|
audioCombinedFilePath: {},
|
|
101
116
|
outputStudioFilePath: {},
|
|
102
|
-
audioDirPath: {},
|
|
103
|
-
audioSegmentDirPath: {},
|
|
104
117
|
musicFile: {},
|
|
105
118
|
map: {
|
|
106
119
|
agent: "mapAgent",
|
|
@@ -108,8 +121,6 @@ const graph_data = {
|
|
|
108
121
|
rows: ":context.studio.script.beats",
|
|
109
122
|
studioBeat: ":context.studio.beats",
|
|
110
123
|
multiLingual: ":context.studio.multiLingual",
|
|
111
|
-
audioDirPath: ":audioDirPath",
|
|
112
|
-
audioSegmentDirPath: ":audioSegmentDirPath",
|
|
113
124
|
context: ":context",
|
|
114
125
|
},
|
|
115
126
|
params: {
|
|
@@ -121,7 +132,7 @@ const graph_data = {
|
|
|
121
132
|
combineFiles: {
|
|
122
133
|
agent: "combineAudioFilesAgent",
|
|
123
134
|
inputs: {
|
|
124
|
-
|
|
135
|
+
onComplete: ":map",
|
|
125
136
|
context: ":context",
|
|
126
137
|
combinedFileName: ":audioCombinedFilePath",
|
|
127
138
|
},
|
|
@@ -140,7 +151,7 @@ const graph_data = {
|
|
|
140
151
|
wait: ":combineFiles",
|
|
141
152
|
voiceFile: ":audioCombinedFilePath",
|
|
142
153
|
outputFile: ":audioArtifactFilePath",
|
|
143
|
-
|
|
154
|
+
context: ":context",
|
|
144
155
|
params: {
|
|
145
156
|
musicFile: ":musicFile",
|
|
146
157
|
},
|
|
@@ -171,40 +182,68 @@ export const audioFilePath = (context) => {
|
|
|
171
182
|
const { outDirPath } = fileDirs;
|
|
172
183
|
return getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
173
184
|
};
|
|
185
|
+
const getConcurrency = (context) => {
|
|
186
|
+
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
187
|
+
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
188
|
+
const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
|
|
189
|
+
return provider === "nijivoice" || provider === "elevenlabs";
|
|
190
|
+
});
|
|
191
|
+
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
192
|
+
};
|
|
193
|
+
const audioAgents = {
|
|
194
|
+
...vanillaAgents,
|
|
195
|
+
fileWriteAgent,
|
|
196
|
+
ttsOpenaiAgent,
|
|
197
|
+
ttsNijivoiceAgent,
|
|
198
|
+
ttsGoogleAgent,
|
|
199
|
+
ttsElevenlabsAgent,
|
|
200
|
+
addBGMAgent,
|
|
201
|
+
combineAudioFilesAgent,
|
|
202
|
+
};
|
|
203
|
+
export const generateBeatAudio = async (index, context, callbacks) => {
|
|
204
|
+
try {
|
|
205
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
206
|
+
const { studio, fileDirs } = context;
|
|
207
|
+
const { outDirPath, audioDirPath } = fileDirs;
|
|
208
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
209
|
+
mkdir(outDirPath);
|
|
210
|
+
mkdir(audioSegmentDirPath);
|
|
211
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
212
|
+
const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
|
|
213
|
+
graph.injectValue("__mapIndex", index);
|
|
214
|
+
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
215
|
+
graph.injectValue("studioBeat", context.studio.beats[index]);
|
|
216
|
+
graph.injectValue("multiLingual", context.studio.multiLingual);
|
|
217
|
+
graph.injectValue("context", context);
|
|
218
|
+
if (callbacks) {
|
|
219
|
+
callbacks.forEach((callback) => {
|
|
220
|
+
graph.registerCallback(callback);
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
await graph.run();
|
|
224
|
+
}
|
|
225
|
+
finally {
|
|
226
|
+
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
227
|
+
}
|
|
228
|
+
};
|
|
174
229
|
export const audio = async (context, callbacks) => {
|
|
175
230
|
try {
|
|
176
231
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
177
232
|
const { studio, fileDirs, lang } = context;
|
|
178
233
|
const { outDirPath, audioDirPath } = fileDirs;
|
|
179
234
|
const audioArtifactFilePath = audioFilePath(context);
|
|
180
|
-
const audioSegmentDirPath =
|
|
181
|
-
const audioCombinedFilePath =
|
|
235
|
+
const audioSegmentDirPath = resolveDirPath(audioDirPath, studio.filename);
|
|
236
|
+
const audioCombinedFilePath = getAudioFilePath(audioDirPath, studio.filename, studio.filename, lang);
|
|
182
237
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
183
238
|
mkdir(outDirPath);
|
|
184
239
|
mkdir(audioSegmentDirPath);
|
|
185
|
-
|
|
186
|
-
const
|
|
187
|
-
const provider = speaker.provider ?? studio.script.speechParams.provider;
|
|
188
|
-
return provider === "nijivoice" || provider === "elevenlabs";
|
|
189
|
-
});
|
|
190
|
-
graph_data.concurrency = hasLimitedConcurrencyProvider ? 1 : 8;
|
|
191
|
-
const graph = new GraphAI(graph_data, {
|
|
192
|
-
...vanillaAgents,
|
|
193
|
-
fileWriteAgent,
|
|
194
|
-
ttsOpenaiAgent,
|
|
195
|
-
ttsNijivoiceAgent,
|
|
196
|
-
ttsGoogleAgent,
|
|
197
|
-
ttsElevenlabsAgent,
|
|
198
|
-
addBGMAgent,
|
|
199
|
-
combineAudioFilesAgent,
|
|
200
|
-
}, { agentFilters });
|
|
240
|
+
const taskManager = new TaskManager(getConcurrency(context));
|
|
241
|
+
const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
|
|
201
242
|
graph.injectValue("context", context);
|
|
202
243
|
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
203
244
|
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
|
204
245
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
205
|
-
graph.injectValue("
|
|
206
|
-
graph.injectValue("audioDirPath", audioDirPath);
|
|
207
|
-
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(studio.script.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
246
|
+
graph.injectValue("musicFile", MulmoMediaSourceMethods.resolve(context.presentationStyle.audioParams.bgm, context) ?? process.env.PATH_BGM ?? defaultBGMPath());
|
|
208
247
|
if (callbacks) {
|
|
209
248
|
callbacks.forEach((callback) => {
|
|
210
249
|
graph.registerCallback(callback);
|
package/lib/actions/captions.js
CHANGED
|
@@ -26,7 +26,7 @@ const graph_data = {
|
|
|
26
26
|
const { fileDirs } = namedInputs.context;
|
|
27
27
|
const { caption } = context;
|
|
28
28
|
const { imageDirPath } = fileDirs;
|
|
29
|
-
const { canvasSize } = context.
|
|
29
|
+
const { canvasSize } = context.presentationStyle;
|
|
30
30
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}_caption.png`;
|
|
31
31
|
const template = getHTMLFile("caption");
|
|
32
32
|
const text = (() => {
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,3 +1,91 @@
|
|
|
1
1
|
import type { CallbackFunction } from "graphai";
|
|
2
|
-
import { MulmoStudioContext } from "../types/index.js";
|
|
2
|
+
import { MulmoStudioContext, MulmoBeat, Text2ImageAgentInfo } from "../types/index.js";
|
|
3
|
+
export declare const imagePreprocessAgent: (namedInputs: {
|
|
4
|
+
context: MulmoStudioContext;
|
|
5
|
+
beat: MulmoBeat;
|
|
6
|
+
index: number;
|
|
7
|
+
suffix: string;
|
|
8
|
+
imageDirPath: string;
|
|
9
|
+
imageAgentInfo: Text2ImageAgentInfo;
|
|
10
|
+
imageRefs: Record<string, string>;
|
|
11
|
+
}) => Promise<{
|
|
12
|
+
imageParams: {
|
|
13
|
+
model?: string | undefined;
|
|
14
|
+
style?: string | undefined;
|
|
15
|
+
moderation?: string | undefined;
|
|
16
|
+
images?: Record<string, {
|
|
17
|
+
type: "image";
|
|
18
|
+
source: {
|
|
19
|
+
url: string;
|
|
20
|
+
kind: "url";
|
|
21
|
+
} | {
|
|
22
|
+
kind: "base64";
|
|
23
|
+
data: string;
|
|
24
|
+
} | {
|
|
25
|
+
text: string;
|
|
26
|
+
kind: "text";
|
|
27
|
+
} | {
|
|
28
|
+
path: string;
|
|
29
|
+
kind: "path";
|
|
30
|
+
};
|
|
31
|
+
}> | undefined;
|
|
32
|
+
};
|
|
33
|
+
movieFile: string | undefined;
|
|
34
|
+
imagePath: string | undefined;
|
|
35
|
+
referenceImage: string | undefined;
|
|
36
|
+
} | {
|
|
37
|
+
imagePath: string;
|
|
38
|
+
images: string[];
|
|
39
|
+
imageFromMovie: boolean;
|
|
40
|
+
imageParams: {
|
|
41
|
+
model?: string | undefined;
|
|
42
|
+
style?: string | undefined;
|
|
43
|
+
moderation?: string | undefined;
|
|
44
|
+
images?: Record<string, {
|
|
45
|
+
type: "image";
|
|
46
|
+
source: {
|
|
47
|
+
url: string;
|
|
48
|
+
kind: "url";
|
|
49
|
+
} | {
|
|
50
|
+
kind: "base64";
|
|
51
|
+
data: string;
|
|
52
|
+
} | {
|
|
53
|
+
text: string;
|
|
54
|
+
kind: "text";
|
|
55
|
+
} | {
|
|
56
|
+
path: string;
|
|
57
|
+
kind: "path";
|
|
58
|
+
};
|
|
59
|
+
}> | undefined;
|
|
60
|
+
};
|
|
61
|
+
movieFile: string | undefined;
|
|
62
|
+
} | {
|
|
63
|
+
images: string[];
|
|
64
|
+
imageParams: {
|
|
65
|
+
model?: string | undefined;
|
|
66
|
+
style?: string | undefined;
|
|
67
|
+
moderation?: string | undefined;
|
|
68
|
+
images?: Record<string, {
|
|
69
|
+
type: "image";
|
|
70
|
+
source: {
|
|
71
|
+
url: string;
|
|
72
|
+
kind: "url";
|
|
73
|
+
} | {
|
|
74
|
+
kind: "base64";
|
|
75
|
+
data: string;
|
|
76
|
+
} | {
|
|
77
|
+
text: string;
|
|
78
|
+
kind: "text";
|
|
79
|
+
} | {
|
|
80
|
+
path: string;
|
|
81
|
+
kind: "path";
|
|
82
|
+
};
|
|
83
|
+
}> | undefined;
|
|
84
|
+
};
|
|
85
|
+
movieFile: string | undefined;
|
|
86
|
+
imagePath: string;
|
|
87
|
+
referenceImage: string;
|
|
88
|
+
prompt: string;
|
|
89
|
+
}>;
|
|
3
90
|
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
91
|
+
export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|