mulmocast 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +28 -0
- package/assets/templates/children_book.json +13 -0
- package/assets/templates/comic_strips.json +14 -1
- package/assets/templates/drslump_comic.json +28 -0
- package/assets/templates/ghibli_comic.json +28 -0
- package/assets/templates/ghost_comic.json +35 -0
- package/assets/templates/onepiece_comic.json +28 -0
- package/assets/templates/portrait_movie.json +28 -0
- package/assets/templates/realistic_movie.json +28 -0
- package/assets/templates/sensei_and_taro.json +21 -0
- package/lib/actions/audio.js +1 -1
- package/lib/actions/captions.js +1 -1
- package/lib/actions/images.js +98 -13
- package/lib/actions/movie.d.ts +1 -1
- package/lib/actions/movie.js +13 -11
- package/lib/actions/pdf.js +6 -4
- package/lib/actions/translate.js +1 -1
- package/lib/agents/image_openai_agent.d.ts +1 -0
- package/lib/agents/image_openai_agent.js +16 -4
- package/lib/agents/movie_google_agent.d.ts +17 -0
- package/lib/agents/movie_google_agent.js +114 -0
- package/lib/cli/bin.js +19 -0
- package/lib/cli/helpers.js +2 -1
- package/lib/methods/mulmo_studio.d.ts +1 -1
- package/lib/tools/create_mulmo_script_from_url.js +1 -1
- package/lib/tools/create_mulmo_script_interactively.js +1 -1
- package/lib/tools/story_to_script.js +1 -1
- package/lib/types/schema.d.ts +1966 -322
- package/lib/types/schema.js +21 -3
- package/lib/types/type.d.ts +3 -1
- package/lib/utils/file.js +20 -9
- package/lib/utils/pdf.d.ts +1 -0
- package/lib/utils/pdf.js +5 -3
- package/lib/utils/preprocess.d.ts +57 -16
- package/lib/utils/utils.d.ts +1 -0
- package/lib/utils/utils.js +3 -0
- package/package.json +9 -9
- package/scripts/templates/children_book.json +0 -7
- package/scripts/templates/image_prompts_template.json +41 -0
- package/scripts/templates/movie_prompts_template.json +50 -0
- package/scripts/templates/sensei_and_taro.json +0 -11
- package/scripts/templates/text_only_template.json +35 -0
- package/assets/templates/ghibli_strips.json +0 -6
- package/scripts/templates/comic_strips.json +0 -30
- package/scripts/templates/ghibli_strips.json +0 -30
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Akira style",
|
|
3
|
+
"description": "Template for Akira style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>AKIRA aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"girl": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -2,5 +2,18 @@
|
|
|
2
2
|
"title": "Children Book",
|
|
3
3
|
"description": "Template for children book.",
|
|
4
4
|
"systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
|
|
16
|
+
}
|
|
17
|
+
},
|
|
5
18
|
"scriptName": "children_book.json"
|
|
6
19
|
}
|
|
@@ -2,5 +2,18 @@
|
|
|
2
2
|
"title": "American Comic Strips",
|
|
3
3
|
"description": "Template for Dilbert-style comic strips.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
-
"
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"scriptName": "text_only_template.json"
|
|
6
19
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Dr. Slump Style",
|
|
3
|
+
"description": "Template for Dr. Slump style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"girl": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Ghibli comic style",
|
|
3
|
+
"description": "Template for Ghibli-style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghibli style</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Ghost in the shell style",
|
|
3
|
+
"description": "Template for Ghost in the shell style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghost in the shell aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"optimus": {
|
|
25
|
+
"type": "image",
|
|
26
|
+
"source": {
|
|
27
|
+
"kind": "url",
|
|
28
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"scriptName": "image_prompts_template.json"
|
|
35
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "One Piece style",
|
|
3
|
+
"description": "Template for One Piece style comic presentation.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>One Piece aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Photo realistic movie (portrait)",
|
|
3
|
+
"description": "Template for photo realistic movie in portrait mode.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1024,
|
|
12
|
+
"height": 1536
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Photo realistic, cinematic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "movie_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Photo realistic movie template",
|
|
3
|
+
"description": "Template for photo realistic movie.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Photo realistic, cinematic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/female_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "movie_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -2,5 +2,26 @@
|
|
|
2
2
|
"title": "Student and Teacher",
|
|
3
3
|
"description": "Interactive discussion between a student and teacher",
|
|
4
4
|
"systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
|
|
16
|
+
},
|
|
17
|
+
"speechParams": {
|
|
18
|
+
"provider": "nijivoice",
|
|
19
|
+
"speakers": {
|
|
20
|
+
"Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
|
|
21
|
+
"Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
|
|
22
|
+
"Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
5
26
|
"scriptName": "sensei_and_taro.json"
|
|
6
27
|
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
12
12
|
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
13
13
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
14
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
15
|
-
const
|
|
15
|
+
const vanillaAgents = agents.default ?? agents;
|
|
16
16
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
17
17
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
18
18
|
const provider_to_agent = {
|
package/lib/actions/captions.js
CHANGED
|
@@ -3,7 +3,7 @@ import * as agents from "@graphai/vanilla";
|
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
5
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
6
|
-
const
|
|
6
|
+
const vanillaAgents = agents.default ?? agents;
|
|
7
7
|
const graph_data = {
|
|
8
8
|
version: 0.5,
|
|
9
9
|
nodes: {
|
package/lib/actions/images.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
|
+
import fs from "fs";
|
|
2
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
3
4
|
import * as agents from "@graphai/vanilla";
|
|
4
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
@@ -6,10 +7,11 @@ import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
|
6
7
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
7
8
|
import imageGoogleAgent from "../agents/image_google_agent.js";
|
|
8
9
|
import imageOpenaiAgent from "../agents/image_openai_agent.js";
|
|
9
|
-
import
|
|
10
|
+
import movieGoogleAgent from "../agents/movie_google_agent.js";
|
|
11
|
+
import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
10
12
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
13
|
import { imagePrompt } from "../utils/prompt.js";
|
|
12
|
-
const
|
|
14
|
+
const vanillaAgents = agents.default ?? agents;
|
|
13
15
|
dotenv.config();
|
|
14
16
|
// const openai = new OpenAI();
|
|
15
17
|
import { GoogleAuth } from "google-auth-library";
|
|
@@ -21,8 +23,12 @@ const htmlStyle = (script, beat) => {
|
|
|
21
23
|
};
|
|
22
24
|
};
|
|
23
25
|
const imagePreprocessAgent = async (namedInputs) => {
|
|
24
|
-
const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
|
|
26
|
+
const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
|
|
25
27
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
28
|
+
if (!imageParams.size) {
|
|
29
|
+
const canvasSize = MulmoScriptMethods.getCanvasSize(context.studio.script);
|
|
30
|
+
imageParams.size = `${canvasSize.width}x${canvasSize.height}`;
|
|
31
|
+
}
|
|
26
32
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
27
33
|
const returnValue = {
|
|
28
34
|
aspectRatio: MulmoScriptMethods.getAspectRatio(context.studio.script),
|
|
@@ -44,7 +50,12 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
44
50
|
}
|
|
45
51
|
}
|
|
46
52
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
47
|
-
|
|
53
|
+
const images = (() => {
|
|
54
|
+
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
55
|
+
const sources = imageNames.map((name) => imageRefs[name]);
|
|
56
|
+
return sources.filter((source) => source !== undefined);
|
|
57
|
+
})();
|
|
58
|
+
return { path: imagePath, prompt, ...returnValue, images };
|
|
48
59
|
};
|
|
49
60
|
const graph_data = {
|
|
50
61
|
version: 0.5,
|
|
@@ -54,9 +65,16 @@ const graph_data = {
|
|
|
54
65
|
imageDirPath: {},
|
|
55
66
|
imageAgentInfo: {},
|
|
56
67
|
outputStudioFilePath: {},
|
|
68
|
+
imageRefs: {},
|
|
57
69
|
map: {
|
|
58
70
|
agent: "mapAgent",
|
|
59
|
-
inputs: {
|
|
71
|
+
inputs: {
|
|
72
|
+
rows: ":context.studio.script.beats",
|
|
73
|
+
context: ":context",
|
|
74
|
+
imageAgentInfo: ":imageAgentInfo",
|
|
75
|
+
imageDirPath: ":imageDirPath",
|
|
76
|
+
imageRefs: ":imageRefs",
|
|
77
|
+
},
|
|
60
78
|
isResult: true,
|
|
61
79
|
params: {
|
|
62
80
|
rowKey: "beat",
|
|
@@ -73,6 +91,7 @@ const graph_data = {
|
|
|
73
91
|
suffix: "p",
|
|
74
92
|
imageDirPath: ":imageDirPath",
|
|
75
93
|
imageAgentInfo: ":imageAgentInfo",
|
|
94
|
+
imageRefs: ":imageRefs",
|
|
76
95
|
},
|
|
77
96
|
},
|
|
78
97
|
imageGenerator: {
|
|
@@ -92,6 +111,43 @@ const graph_data = {
|
|
|
92
111
|
size: ":preprocessor.imageParams.size",
|
|
93
112
|
moderation: ":preprocessor.imageParams.moderation",
|
|
94
113
|
aspectRatio: ":preprocessor.aspectRatio",
|
|
114
|
+
images: ":preprocessor.images",
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
defaultValue: {},
|
|
118
|
+
},
|
|
119
|
+
prepareMovie: {
|
|
120
|
+
agent: (namedInputs) => {
|
|
121
|
+
const { beat, imageDirPath, index, context } = namedInputs;
|
|
122
|
+
if (beat.moviePrompt) {
|
|
123
|
+
const movieFile = `${imageDirPath}/${context.studio.filename}/${index}.mov`;
|
|
124
|
+
return { movieFile };
|
|
125
|
+
}
|
|
126
|
+
return {};
|
|
127
|
+
},
|
|
128
|
+
inputs: {
|
|
129
|
+
result: ":imageGenerator", // to wait for imageGenerator to finish
|
|
130
|
+
imagePath: ":preprocessor.path",
|
|
131
|
+
beat: ":beat",
|
|
132
|
+
imageDirPath: ":imageDirPath",
|
|
133
|
+
index: ":__mapIndex",
|
|
134
|
+
context: ":context",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
movieGenerator: {
|
|
138
|
+
if: ":prepareMovie.movieFile",
|
|
139
|
+
agent: "movieGoogleAgent",
|
|
140
|
+
inputs: {
|
|
141
|
+
prompt: ":beat.moviePrompt",
|
|
142
|
+
imagePath: ":preprocessor.path",
|
|
143
|
+
file: ":prepareMovie.movieFile",
|
|
144
|
+
studio: ":context.studio", // for cache
|
|
145
|
+
index: ":__mapIndex", // for cache
|
|
146
|
+
sessionType: "movie", // for cache
|
|
147
|
+
params: {
|
|
148
|
+
model: ":context.studio.script.movieParams.model",
|
|
149
|
+
aspectRatio: ":preprocessor.aspectRatio",
|
|
150
|
+
duration: ":beat.duration",
|
|
95
151
|
},
|
|
96
152
|
},
|
|
97
153
|
defaultValue: {},
|
|
@@ -99,11 +155,9 @@ const graph_data = {
|
|
|
99
155
|
output: {
|
|
100
156
|
agent: "copyAgent",
|
|
101
157
|
inputs: {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
output: {
|
|
106
|
-
imageFile: ".image",
|
|
158
|
+
onComplete: ":movieGenerator",
|
|
159
|
+
imageFile: ":preprocessor.path",
|
|
160
|
+
movieFile: ":prepareMovie.movieFile",
|
|
107
161
|
},
|
|
108
162
|
isResult: true,
|
|
109
163
|
},
|
|
@@ -152,7 +206,7 @@ const generateImages = async (context) => {
|
|
|
152
206
|
{
|
|
153
207
|
name: "fileCacheAgentFilter",
|
|
154
208
|
agent: fileCacheAgentFilter,
|
|
155
|
-
nodeIds: ["imageGenerator"],
|
|
209
|
+
nodeIds: ["imageGenerator", "movieGenerator"],
|
|
156
210
|
},
|
|
157
211
|
];
|
|
158
212
|
const options = {
|
|
@@ -160,7 +214,7 @@ const generateImages = async (context) => {
|
|
|
160
214
|
};
|
|
161
215
|
const imageAgentInfo = MulmoScriptMethods.getImageAgentInfo(studio.script);
|
|
162
216
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
163
|
-
if (imageAgentInfo.provider === "google") {
|
|
217
|
+
if (imageAgentInfo.provider === "google" || studio.script.movieParams?.provider === "google") {
|
|
164
218
|
GraphAILogger.log("google was specified as text2image engine");
|
|
165
219
|
const token = await googleAuth();
|
|
166
220
|
options.config = {
|
|
@@ -168,16 +222,47 @@ const generateImages = async (context) => {
|
|
|
168
222
|
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
169
223
|
token,
|
|
170
224
|
},
|
|
225
|
+
movieGoogleAgent: {
|
|
226
|
+
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
227
|
+
token,
|
|
228
|
+
},
|
|
171
229
|
};
|
|
172
230
|
}
|
|
231
|
+
if (imageAgentInfo.provider === "openai") {
|
|
232
|
+
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
233
|
+
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
234
|
+
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
235
|
+
graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
|
|
236
|
+
}
|
|
237
|
+
const imageRefs = {};
|
|
238
|
+
const images = studio.script.imageParams?.images;
|
|
239
|
+
if (images) {
|
|
240
|
+
await Promise.all(Object.keys(images).map(async (key) => {
|
|
241
|
+
const image = images[key];
|
|
242
|
+
if (image.source.kind === "path") {
|
|
243
|
+
imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
|
|
244
|
+
}
|
|
245
|
+
else if (image.source.kind === "url") {
|
|
246
|
+
const response = await fetch(image.source.url);
|
|
247
|
+
if (!response.ok) {
|
|
248
|
+
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
249
|
+
}
|
|
250
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
251
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
|
|
252
|
+
await fs.promises.writeFile(imagePath, buffer);
|
|
253
|
+
imageRefs[key] = imagePath;
|
|
254
|
+
}
|
|
255
|
+
}));
|
|
256
|
+
}
|
|
173
257
|
GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
|
|
174
258
|
const injections = {
|
|
175
259
|
context,
|
|
176
260
|
imageAgentInfo,
|
|
177
261
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
|
|
178
262
|
imageDirPath,
|
|
263
|
+
imageRefs,
|
|
179
264
|
};
|
|
180
|
-
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
|
|
265
|
+
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, movieGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
|
|
181
266
|
Object.keys(injections).forEach((key) => {
|
|
182
267
|
graph.injectValue(key, injections[key]);
|
|
183
268
|
});
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
|
|
|
3
3
|
videoId: string;
|
|
4
4
|
videoPart: string;
|
|
5
5
|
};
|
|
6
|
-
export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
|
|
6
|
+
export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
|
|
7
7
|
audioId: string;
|
|
8
8
|
audioPart: string;
|
|
9
9
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
|
26
26
|
videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
|
|
27
27
|
};
|
|
28
28
|
};
|
|
29
|
-
export const getAudioPart = (inputIndex, duration, delay) => {
|
|
29
|
+
export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
|
|
30
30
|
const audioId = `a${inputIndex}`;
|
|
31
31
|
return {
|
|
32
32
|
audioId,
|
|
33
33
|
audioPart: `[${inputIndex}:a]` +
|
|
34
34
|
`atrim=duration=${duration},` + // Trim to beat duration
|
|
35
35
|
`adelay=${delay * 1000}|${delay * 1000},` +
|
|
36
|
+
`volume=${mixAudio},` + // 👈 add this line
|
|
36
37
|
`aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
|
|
37
38
|
`[${audioId}]`,
|
|
38
39
|
};
|
|
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
68
69
|
// Add each image input
|
|
69
70
|
const filterComplexVideoIds = [];
|
|
70
71
|
const filterComplexAudioIds = [];
|
|
71
|
-
studio.beats.reduce((timestamp,
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
|
+
const beat = studio.script.beats[index];
|
|
74
|
+
if (!studioBeat.imageFile || !studioBeat.duration) {
|
|
75
|
+
throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
|
|
74
76
|
}
|
|
75
|
-
const inputIndex = FfmpegContextAddInput(ffmpegContext,
|
|
76
|
-
const mediaType = MulmoScriptMethods.getImageType(studio.script,
|
|
77
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.movieFile ?? studioBeat.imageFile);
|
|
78
|
+
const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
|
|
77
79
|
const extraPadding = (() => {
|
|
78
80
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
79
81
|
if (index === 0) {
|
|
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
84
86
|
}
|
|
85
87
|
return 0;
|
|
86
88
|
})();
|
|
87
|
-
const duration =
|
|
89
|
+
const duration = studioBeat.duration + extraPadding;
|
|
88
90
|
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
|
|
89
91
|
ffmpegContext.filterComplex.push(videoPart);
|
|
90
|
-
if (caption &&
|
|
91
|
-
const captionInputIndex = FfmpegContextAddInput(ffmpegContext,
|
|
92
|
+
if (caption && studioBeat.captionFile) {
|
|
93
|
+
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
|
|
92
94
|
const compositeVideoId = `c${index}`;
|
|
93
95
|
ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
|
|
94
96
|
filterComplexVideoIds.push(compositeVideoId);
|
|
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
96
98
|
else {
|
|
97
99
|
filterComplexVideoIds.push(videoId);
|
|
98
100
|
}
|
|
99
|
-
if (
|
|
100
|
-
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
|
|
101
|
+
if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
|
|
102
|
+
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
|
|
101
103
|
filterComplexAudioIds.push(audioId);
|
|
102
104
|
ffmpegContext.filterComplex.push(audioPart);
|
|
103
105
|
}
|
package/lib/actions/pdf.js
CHANGED
|
@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
102
102
|
const pos = (() => {
|
|
103
103
|
if (isLandscapeImage) {
|
|
104
104
|
const cellHeight = pageHeight / imagesPerPage - offset;
|
|
105
|
-
const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
|
|
106
|
-
const x = offset;
|
|
105
|
+
const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
|
|
106
|
+
const x = offset + (containerWidth - drawWidth) / 2;
|
|
107
107
|
const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
|
|
108
108
|
return {
|
|
109
109
|
x,
|
|
110
110
|
y,
|
|
111
111
|
width: drawWidth,
|
|
112
112
|
height: drawHeight,
|
|
113
|
+
containerWidth,
|
|
113
114
|
};
|
|
114
115
|
}
|
|
115
116
|
else {
|
|
116
117
|
const cellWidth = pageWidth / imagesPerPage;
|
|
117
|
-
const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
|
|
118
|
+
const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
|
|
118
119
|
const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
|
|
119
120
|
const y = pageHeight - drawHeight - offset;
|
|
120
121
|
return {
|
|
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
122
123
|
y,
|
|
123
124
|
width: drawWidth,
|
|
124
125
|
height: drawHeight,
|
|
126
|
+
containerWidth,
|
|
125
127
|
};
|
|
126
128
|
}
|
|
127
129
|
})();
|
|
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
136
138
|
for (const [index, line] of lines.entries()) {
|
|
137
139
|
page.drawText(line, {
|
|
138
140
|
...pos,
|
|
139
|
-
x:
|
|
141
|
+
x: offset + pos.containerWidth + textMargin,
|
|
140
142
|
y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
|
|
141
143
|
size: fontSize,
|
|
142
144
|
font,
|
package/lib/actions/translate.js
CHANGED
|
@@ -7,7 +7,7 @@ import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/strin
|
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
9
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
10
|
-
const
|
|
10
|
+
const vanillaAgents = agents.default ?? agents;
|
|
11
11
|
const translateGraph = {
|
|
12
12
|
version: 0.5,
|
|
13
13
|
nodes: {
|